changeset 12081:58be5a182473 octave-forge

Return requested range rather than entire sheet data
author prnienhuis
date Sun, 29 Sep 2013 21:06:31 +0000
parents cc8659fdf2cf
children b0cd76c3a3fd
files main/io/inst/private/__OCT_ods2oct__.m
diffstat 1 files changed, 67 insertions(+), 48 deletions(-) [+]
line wrap: on
line diff
--- a/main/io/inst/private/__OCT_ods2oct__.m	Sun Sep 29 21:05:57 2013 +0000
+++ b/main/io/inst/private/__OCT_ods2oct__.m	Sun Sep 29 21:06:31 2013 +0000
@@ -32,33 +32,33 @@
 ## 2013-09-23 Renamed to __OCT_ods2oct__.m
 ## 2013-09-29 Use values between <text> tags only for strings & dates/times
 ##     ''     Return date values as Octave datenums (doubles)
+##     ''     Return requested range rather than entire sheet data
 
-function [ rawarr, xls, rstatus] = __OCT_ods2oct__ (xls, wsh, cellrange='', spsh_opts)
+function [ rawarr, ods, rstatus] = __OCT_ods2oct__ (ods, wsh, cellrange='', spsh_opts)
 
   rstatus = 0;
 
   ## Check if requested worksheet exists in the file & if so, get sheet
   if (isnumeric (wsh))
-    if (wsh > numel (xls.sheets.sh_names) || wsh < 1)
-      error ("ods2oct: sheet number (%d) out of range (1 - %d)", wsh, numel (xls.sheets.sh_names));
+    if (wsh > numel (ods.sheets.sh_names) || wsh < 1)
+      error ("ods2oct: sheet number (%d) out of range (1 - %d)", wsh, numel (ods.sheets.sh_names));
     endif
   elseif (ischar (wsh))
     idx = strmatch (wsh, ods.sheets.sh_names);
     if (isempty (idx))
-      error ("ods2oct: sheet '%s' not found in file %s", wsh, xls.filename);
+      error ("ods2oct: sheet '%s' not found in file %s", wsh, ods.filename);
     endif
     wsh = idx;
   endif
-  sheet = xls.workbook(xls.sheets.shtidx(wsh):xls.sheets.shtidx(wsh+1));
+  sheet = ods.workbook(ods.sheets.shtidx(wsh):ods.sheets.shtidx(wsh+1));
 
   ## Check ranges
-  [ firstrow, lastrow, lcol, rcol ] = getusedrange (xls, wsh);
-  ## FIXME first row & left col always 1
+  [ firstrow, lastrow, lcol, rcol ] = getusedrange (ods, wsh);
   if (isempty (cellrange))
     if (firstrow == 0 && lastrow == 0)
       ## Empty sheet
       rawarr = {};
-      printf ("Worksheet '%s' contains no data\n", xls.sheets.sh_names{wsh});
+      printf ("Worksheet '%s' contains no data\n", ods.sheets.sh_names{wsh});
       rstatus = 1;
       return;
     else
@@ -68,42 +68,53 @@
   else
     [topleft, nrows, ncols, firstrow, lcol] = parse_sp_range (cellrange);
     ## Check if requested range exists
+    if (firstrow > lastrow || lcol > rcol)
+      ## Out of occupied range
+      warning ("ods2oct: requested range outside occupied range");
+      rawarr = {};
+      ods.limits = [];
+      return
+    endif
     lastrow = min (lastrow, firstrow + nrows - 1);
     rcol = min (rcol, lcol + ncols - 1);
   endif
 
-  rawarr = cell (nrows, rcol);
+  ## FIXME Preallocation + data reading can be more efficient
+  ## Preallocate output array; provisionally assign max nr. of rows & columns
+  rawarr = cell (lastrow, rcol);
 
   ## Get data
-  re = 1;
-  ii = 0;
+  re = 1;                       # Start table-row search at first char of sheet
+  irow = 0;                     # Counts "real" spreadsheet rows
   trow = " ";
   ## Row index ii below does not necessarily match table-rows!
-  while (ii < nrows && (! isempty (trow)))
+  while (irow < lastrow && (! isempty (trow)))
     ## Get next table-row
     [trow, ~, re] = getxmlnode (sheet, "table:table-row", re);
 
     if (! isempty (trow))
       ## Check if table-row has any data
       datrow = index (trow, " office:");
+      ## Check repeat status and update row counter
+      reprow = str2double (getxmlattv (trow, "table:number-rows-repeated"));
+      if (isfinite (reprow))
+        reprow = min (reprow, lastrow - irow);
+      endif
+      irow++;
 
       ## Only process table-row contents if it has any data. Skip upper
       ## empty table-rows (that's why we need an OR), only start counting
       ## with the first table-row containing data
-      if (datrow || ii)
-        ++ii;
-        ## Check repeat status
-        reprow = str2double (getxmlattv (trow, "table:number-rows-repeated"));
-        ce = 0;
-        jj = 0;
+      if (datrow || irow)
+
+        ce = 0;                 # Char pointer on table-row
         tcell = " ";
-        ## Column index jj below does not necessarily match table-cells!
-        while (jj < rcol && (! isempty (tcell)))
-          ++jj;
-
+        icol = 0;               # Count spreadsheet column
+        while (icol < rcol && (! isempty (tcell)))
           ## Get next table-cell. First see if it is covered (merged)
           [tcell1, ~, ce1] = getxmlnode (trow, "table:covered-table-cell", ce+1);
           [tcell2, ~, ce2] = getxmlnode (trow, "table:table-cell", ce+1);
+
           if (ce1 > 0 && ce2 > 0)
             ## Both  table-cell and a table-covered-cell are present
             if (ce1 < ce2)
@@ -128,10 +139,14 @@
               tcell = tcell2;
             endif
           endif
+          ## First check its repeat status and update column counter
+          repcol = str2double (getxmlattv (tcell, "table:number-columns-repeated"));
+          if (isfinite (repcol))
+            repcol = min (repcol, rcol - icol);
+          endif
+          icol++;
 
           if (! isempty (tcell))
-            ## First check its repeat status
-            repcol = str2double (getxmlattv (tcell, "table:number-columns-repeated"));
             ## Try to get value type
             ctype = '';
             if (ce2)
@@ -157,32 +172,32 @@
                   form = regexprep (form, '\[\.(\w+)\]', '$1');
                   form = regexprep (form, '\[\.(\w+):', '$1:');
                   form = regexprep (form, ':\.(\w+)\]', ':$1');
-                  rawarr{ii, jj} = form;
+                  rawarr{irow, icol} = form;
                 case "float"
                   ## Watch out for error values. If so, <text> has #VALUE and office:value = 0
                   if (isfinite (str2double (ctvalue)))
-                    rawarr{ii, jj} = str2double (getxmlattv (tcell, "office:value"));
+                    rawarr{irow, icol} = str2double (getxmlattv (tcell, "office:value"));
                   else
-                    rawarr{ii, jj} = NaN;
+                    rawarr{irow, icol} = NaN;
                   endif
                 case "percentage"
                   ## Watch out for error values. If so, <text> has #VALUE and office:value = 0
                   ctvalue = ctvalue (1:end-1);
                   if (isfinite (str2double (ctvalue)))
-                    rawarr{ii, jj} = str2double (getxmlattv (tcell, "office:value"));
+                    rawarr{irow, icol} = str2double (getxmlattv (tcell, "office:value"));
                   else
-                    rawarr{ii, jj} = NaN;
+                    rawarr{irow, icol} = NaN;
                   endif
                 case "currency"
                   ## Watch out for error values. If so, <text> has #VALUE and office:value = 0
                   idx = regexp (ctvalue, '[\d.\d]');
                   if (isempty (idx))
-                    rawarr{ii, jj} = NaN;
+                    rawarr{irow, icol} = NaN;
                   else
-                    rawarr{ii, jj} = str2double (getxmlattv (tcell, "office:value"));
+                    rawarr{irow, icol} = str2double (getxmlattv (tcell, "office:value"));
                   endif
                 case "string"
-                  rawarr{ii, jj} = ctvalue;
+                  rawarr{irow, icol} = ctvalue;
                 case "date"
                   cvalue = getxmlattv (tcell, "office:date-value");
                   try
@@ -192,17 +207,17 @@
                     yr = str2double (cv(1));
                     mo = str2double (cv(2));
                     dy = str2double (cv(3));
-                    rawarr{ii, jj} = datenum(yr, mo, dy) + 693960;
+                    rawarr{irow, icol} = datenum(yr, mo, dy) + 693960;
                     ## Time part, if any (that's what the try-catch is for)
                     cv = regexp (cvalue{2}, '[0-9]*', "match");
                     hh = str2double (cv(1));
                     mm = str2double (cv(2));
                     ss = str2double (cv(3));
-                    rawarr{ii, jj} += datenum (0, 0, 0, hh, mm, ss);
+                    rawarr{irow, icol} += datenum (0, 0, 0, hh, mm, ss);
                   catch
                   end_try_catch
                 case "boolean"
-                  rawarr{ii, jj} = strcmpi (ctvalue, "true");
+                  rawarr{irow, icol} = strcmpi (ctvalue, "true");
                 case "time"
                   ## Time values usually have hours first, then minutes, optionally seconds
                   hh = mi = ss = 0;
@@ -214,26 +229,27 @@
                     ss = str2double (ctvalue(3));
                   catch
                   end_try_catch
-                  rawarr{ii, jj} = datenum (0, 0, 0, hh, mi, ss);
+                  rawarr{irow, icol} = datenum (0, 0, 0, hh, mi, ss);
                 otherwise
                   ## Do nothing
               endswitch
             endif
+
             ## Copy cell contents for repeated columns & bump column counter
-            if (isfinite (repcol))
-              rawarr(ii, jj+1:jj+repcol-1) = rawarr(ii, jj);
-              jj += repcol - 1;
+            if (isfinite (repcol) && icol < rcol)
+              rawarr(irow, icol+1:icol+repcol-1) = rawarr(irow, icol);
+              icol += repcol;
               repcol = '';
             endif
           endif
         endwhile
 
         ## Copy row contents to repeated rows & bump row counter
-        if (isfinite (reprow))
-          for kk=ii+1:min (nrows, ii+reprow-1)
-            rawarr(kk, :) = rawarr(ii, :);
+        if (isfinite (reprow) && irow < lastrow)
+          for kk=irow+1:min (nrows, irow+reprow-1)
+            rawarr(kk, :) = rawarr(irow, :);
           endfor
-          ii += reprow - 1;
+          irow += reprow;
           reprow = '';
         endif
       endif
@@ -241,13 +257,16 @@
 
   endwhile
 
-  ## If required strip leftmost empty columns
+  ## If required strip leftmost empty columns/topmost empty rows
   if (lcol > 1)
-    rawarr (:, 1:ncols) = rawarr (:, lcol:rcol);
-    rawarr (:, ncols+1:end) = [];
+    rawarr(:, 1:ncols) = rawarr(:, lcol:rcol);
+    rawarr(:, ncols+1:end) = [];
   endif
-
+  if (firstrow > 1)
+    rawarr(1:nrows, :) = rawarr(firstrow:lastrow, :);
+    rawarr(nrows+1:end, :) = [];
+  endif
   ## Keep track of data rectangle limits
-  xls.limits = [1, ncols; 1, nrows];
+  ods.limits = [lcol, rcol; firstrow, lastrow];
 
 endfunction