changeset 12225:0504f7cafe76 octave-forge

that's why an XML parser is superior over regular expressions); replace fgetl calls by fread to cope with EOLs
author prnienhuis
date Fri, 20 Dec 2013 13:07:16 +0000
parents f376ed0cff8d
children 0117d73458d7
files main/io/inst/private/__OCT_getusedrange__.m
diffstat 1 files changed, 21 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/main/io/inst/private/__OCT_getusedrange__.m	Fri Dec 20 13:06:33 2013 +0000
+++ b/main/io/inst/private/__OCT_getusedrange__.m	Fri Dec 20 13:07:16 2013 +0000
@@ -30,7 +30,8 @@
 ## 2013-09-27 Re-use old jOpenDocument code; may be slow but it is well-tested
 ## 2013-10-01 Gnumeric subfunction added
 ## 2013-11-03 Fix wrong variable name "xml"->"sheet" in __OCT_ods_getusedrange__ 
-## 2013-11-16 Replace fgetl calls by fread to cope with EOLs
+## 2013-11-16 Replace fgetl calls by fread to cope with EOLs
+## 2013-12-14 (OOXML) Insert scanning to circumvent faulty "A1" range from POI
 
 function [ trow, brow, lcol, rcol ] = __OCT_getusedrange__ (spptr, ii)
 
@@ -60,21 +61,34 @@
   trow = brow = lcol = rcol = 0;
 
   ## Read first part of raw worksheet
-  rawsheet = fopen (sprintf ('%s/xl/worksheets/sheet%d.xml', spptr.workbook, ii));
-  if (rawsheet > 0)
+  fid = fopen (sprintf ('%s/xl/worksheets/sheet%d.xml', spptr.workbook, ii));
+  if (fid > 0)
     xml = fread (fid, 512, "char=>char").';  ## Occupied range is in first 512 bytes
-    fclose (rawsheet);
+    fclose (fid);
   else
     ## We know the number must be good => apparently tmpdir is damaged or it has gone
     error ("getusedrange: sheet number nonexistent or corrupted file pointer struct");
   endif
 
   node = getxmlnode (xml, "dimension");
-  crange = getxmlattv (node, "ref");
-
+  crange = getxmlattv (node, "ref");
+  if (strcmpi (crange, "A1"))
+    ## Looks like it has been written by POI OOXML. We need a better guess
+    ## 1. Re-read entire worksheet
+    fid = fopen (sprintf ('%s/xl/worksheets/sheet%d.xml', spptr.workbook, ii));
+    xml = fread (fid, Inf, "char=>char").';
+    fclose (fid);
+    ## 2. Scan for cell addresses
+    addr = cell2mat (regexp (xml, '<c r="(\w+)?"', "tokens"));
+    ## 3. Split in rows & columns, then sort
+    rows = sort (str2double (cell2mat (regexp (addr, '\d+', "match"))));
+    cols = sort (strjust (char (cell2mat (regexp (addr, '\D*', "match")))));
+    ## 4. Extract range by taking outer row/col limits
+    crange = sprintf ("%s%d:%s%d", cols(1), rows(1), cols(end), rows(end));
+  endif
   [~, nrows, ncols, trow, lcol] = parse_sp_range (crange);
   brow = trow + nrows - 1;
-  rcol = lcol + ncols - 1;
+  rcol = lcol + ncols - 1;
 
 endfunction