changeset 12063:c93f5089bda2 octave-forge

New files for experimental OCT (native Octave) interface
author prnienhuis
date Fri, 27 Sep 2013 17:59:58 +0000
parents 8e1f276b1202
children 07169c2596f2
files main/io/inst/private/__OCT_getusedrange__.m main/io/inst/private/__OCT_ods2oct__.m main/io/inst/private/__OCT_spsh_close__.m main/io/inst/private/__OCT_spsh_info__.m main/io/inst/private/__OCT_spsh_open__.m
diffstat 5 files changed, 603 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/io/inst/private/__OCT_getusedrange__.m	Fri Sep 27 17:59:58 2013 +0000
@@ -0,0 +1,211 @@
+## Copyright (C) 2013 Philip Nienhuis
+## 
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or
+## (at your option) any later version.
+## 
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+## 
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*- 
+## @deftypefn {Function File} {@var{retval} =} __OCT_getusedrange__ (@var{x} @var{y})
+## Get leftmost & rightmost occupied column numbers, and topmost and
+## lowermost occupied row numbers (base 1).
+##
+## @seealso{}
+## @end deftypefn
+
+## Author: Philip Nienhuis <prnienhuis at users.sf.net>
+## Created: 2013-09-08
+## Updates:
+## 2013-09-23 Prepared for adding in OOXML
+## 2013-09-26 Improved code to skip last empty columns in column count
+
+function [ trow, brow, lcol, rcol ] = __OCT_getusedrange__ (spptr, ii)
+
+  if (strcmpi (spptr.filename(end-3:end), ".ods"))
+    [ trow, brow, lcol, rcol ] = __OCT_ods_getusedrange__ (spptr, ii);
+  else
+    [ trow, brow, lcol, rcol ] = __OCT_xlsx_getusedrange__ (spptr, ii);
+  endif
+
+endfunction
+
+
+##=============================OOXML========================
+function [ trow, brow, lcol, rcol ] = __OCT_xlsx_getusedrange__ (spptr, ii);
+
+  trow = brow = lcol = rcol = 0;
+
+  ## FIXME OOXML stuff (.xlsx) here
+
+endfunction
+
+##==============================ODS=========================
+function [ trow, brow, lcol, rcol ] = __OCT_ods_getusedrange__ (spptr, ii)
+
+  trow = brow = lcol = rcol = 0;
+
+  ## Check input
+  nsheets = numel (spptr.sheets.sh_names); 
+  if (ii > nsheets)
+    error ("getusedrange: sheet index (%d) out of range (1 - %d)", ii, nsheets);
+  endif
+
+  ## Get requested sheet
+  sheet = spptr.workbook(spptr.sheets.shtidx(ii):spptr.sheets.shtidx(ii+1)-1);
+
+  ## Check if sheet contains any cell content at all
+  ## FIXME: in far-fetched cases, cell string content may contain ' office:' too
+  if (! index (sheet, " office:"))
+    return
+  endif
+
+  ## Assess number of spreadsheet rows out of table-rows
+  rowidx = [strfind(sheet, "<table:table-row") length(sheet)];
+  nrows1 = nrows = numel (rowidx) - 1;
+  ## Get and count repeated table-row occurences
+  reprows = strfind (sheet, "table:number-rows-repeated");
+  repcnt1 = 0;
+  ## Find which rows contain rep counts
+  if (! isempty (reprows))
+    for ii=1:numel (reprows)
+      irow = find ((reprows(ii) > rowidx))(end);
+      tblrow = sheet(rowidx(irow):rowidx(irow+1));
+      repcnt = str2double (getxmlattv (tblrow, "table:number-rows-repeated")) - 1;
+      ## Add repcount to temporary counter
+      repcnt1 += repcnt;
+      ## Check if this row contains any data
+      if (index (tblrow, "office:"))
+        ## If yes, add repcounter to nrows and reset it
+        nrows += repcnt1;
+        repcnt1 = 0;
+      else
+        ## If no, check if there's data in the next table-row
+        tblrow = getxmlnode (sheet(rowidx(irow+1):end), "table:table-row");
+        if (! isempty (tblrow))
+          if (index (tblrow, "office:"))
+            ## If yes, add repcounter to nrows and reset it
+            nrows += repcnt1;
+            repcnt1 = 0;
+          endif
+        endif
+      endif
+    endfor
+  else
+    
+  endif
+
+  ## Set spreadsheet upper data row and count columns
+  ncol = 0;
+  re = 1;
+  for jj=1:nrows1
+    ## Get a table row
+    [trow1, ~, re] = getxmlnode (sheet, "table:table-row", re);
+
+    ## Prepare to count columns in row
+    tcell = " ";
+    tcidx = 1;
+
+    ## Find top row index. Only for first table-row, check if empty
+    if (! ncol)
+      if (index (trow1, "office:"))
+        trow = 1;
+      else
+        ## Apparently a placeholder table-row
+        repcnt = str2double (getxmlattv (trow1, "table:number-rows-repeated"));
+        if (isfinite (repcnt))
+          ## First row with data is below this table row
+          trow = repcnt + 1;
+        else
+          ## Upper table row is a single row
+          trow = 2;
+        endif
+      endif
+
+      ## Explore number of columns in row (should match that of entire sheet)
+      ## Older OOo versions fill the entire width with nr-cols-repeated attrib
+      while (! isempty (tcell))
+        emptycols = 0;
+        ## Try to get next table-cell
+        [tcell, ~, tcidx] = getxmlnode (trow1, "table:table-cell", tcidx);
+        if (! isempty (tcell))
+          repcolatt = getxmlattv (tcell, "table:number-columns-repeated");
+          repcol = str2double (repcolatt);
+          if (! isfinite (repcol))
+            emptycols += repcol;
+            ## Check if cell has data
+            if (index (tcell, " office:"))
+              ncol += emptycols;
+              emptycols = 0;
+            endif
+          else
+            if (index (tcell, " office:"))
+              ncol += 1 + emptycols;
+              emptycols = 0;
+            else
+              emptycols++;
+            endif
+          endif
+        endif
+      endwhile
+      if (ncol)
+        lcol = ncol;
+      else
+        lcol = ncol = repcol;
+      endif
+
+    ## For subsequent rows, just check if 1st & last tcell contain repcols
+    ## On older OOo versions an empty to row may have 1024 cols, and ncol = 0
+    else
+      ## Get indices of all table-cells
+      tcidx = regexp (trow1, '<table:table-cell', "start");
+      ## Leftmost table-cell
+      tcell = getxmlnode (trow1, "table:table-cell");
+      repcol = str2double (getxmlattv (tcell, "table:number-columns-repeated"));
+      if (index (tcell, " office:"))
+        ## Yes. Leftmost table-cell contains data
+        lcol = 1; 
+      else
+        if (isfinite (repcol))
+          lcol = min (lcol, repcol + 1);
+        else
+          lcol = 2;
+        endif
+      endif
+      ## Rightmost table-cell, if row contains more than one tcell
+      if (numel (tcidx) > 1)
+        rc = str2double (getxmlattv (trow1(tcidx(end):end), "table:number-columns-repeated"));
+        if (! isfinite (rc))
+          ## Check for data content
+          if (index (trow1(tcidx(end):end), " office:value"))
+            ncol = min (ncol, numel (tcidx));
+          endif
+        endif
+        ##
+      endif
+    endif
+
+    
+    ## Check if last table-row contains any data
+    if (jj == nrows1)
+      if (! index (trow1, " office:"))
+        nrows -= 1;
+      endif
+    endif
+
+  endfor
+
+  if (ncol > 0)
+    rcol = ncol;
+    brow = nrows;
+  endif
+
+endfunction
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/io/inst/private/__OCT_ods2oct__.m	Fri Sep 27 17:59:58 2013 +0000
@@ -0,0 +1,222 @@
+## Copyright (C) 2013 Philip Nienhuis
+## 
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or
+## (at your option) any later version.
+## 
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+## 
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*- 
+## @deftypefn {Function File} {@var{retval} =} __OCT_ods2oct__ (@var{x} @var{y})
+##
+## @seealso{}
+## @end deftypefn
+
+## Author: Philip Nienhuis <prnienhuis at users.sf.net>
+## Created: 2013-09-08
+## Updates:
+## 2013-09-09 Fix getxmlnode call
+##     ''     boolean, percentage & currency tags properly handled
+## 2013-09-11 Properly skip empty upper table-rows
+##     ''     Return dates as strings to avoid misinterpreting plethora of formats
+##     ''     Try-catch construct for time values
+##     ''     Formula reading support
+## 2013-09-23 Renamed to __OCT_ods2oct__.m
+
+function [ rawarr, xls, rstatus] = __OCT_ods2oct__ (xls, wsh, cellrange='', spsh_opts)
+
+  rstatus = 0;
+
+  ## Check if requested worksheet exists in the file & if so, get sheet
+  if (isnumeric (wsh))
+    if (wsh > numel (xls.sheets.sh_names) || wsh < 1)
+      error ("ods2oct: sheet number (%d) out of range (1 - %d)", wsh, numel (xls.sheets.sh_names));
+    endif
+  elseif (ischar (wsh))
+    idx = strmatch (wsh, ods.sheets.sh_names);
+    if (isempty (idx))
+      error ("ods2oct: sheet '%s' not found in file %s", wsh, xls.filename);
+    endif
+    wsh = idx;
+  endif
+  sheet = xls.workbook(xls.sheets.shtidx(wsh):xls.sheets.shtidx(wsh+1));
+
+  ## Check ranges
+  [ firstrow, lastrow, lcol, rcol ] = getusedrange (xls, wsh);
+  ## FIXME first row & left col always 1
+  if (isempty (cellrange))
+    if (firstrow == 0 && lastrow == 0)
+      ## Empty sheet
+      rawarr = {};
+      printf ("Worksheet '%s' contains no data\n", xls.sheets.sh_names{wsh});
+      rstatus = 1;
+      return;
+    else
+      nrows = lastrow - firstrow + 1;
+      ncols = rcol - lcol + 1;
+    endif
+  else
+    [topleft, nrows, ncols, firstrow, lcol] = parse_sp_range (cellrange);
+    ## Check if requested range exists
+    lastrow = min (lastrow, firstrow + nrows - 1);
+    rcol = min (rcol, lcol + ncols - 1);
+  endif
+
+  rawarr = cell (nrows, rcol);
+
+  ## Get data
+  re = 1;
+  ii = 0;
+  trow = " ";
+  ## Row index ii below does not necessarily match table-rows!
+  while (ii < lastrow && (! isempty (trow)))
+    ## Get next table-row
+    [trow, ~, re] = getxmlnode (sheet, "table:table-row", re);
+
+    if (! isempty (trow))
+      ## Check if table-row has any data
+      datrow = index (trow, " office:");
+
+      ## Only process table-row contents if it has any data. Ski[ upper
+      ## empty table-rows (that's why we need an OR), only start counting
+      ## with the first table-row containing data
+      if (datrow || ii)
+        ++ii;
+        ## Check repeat status
+        reprow = str2double (getxmlattv (trow, "table:number-rows-repeated"));
+        ce = 0;
+        jj = 0;
+        tcell = " ";
+        ## Column index jj below does not necessarily match table-cells!
+        while (jj < rcol && (! isempty (tcell)))
+          ++jj;
+
+          ## Get next table-cell. First see if it is covered.
+          [tcell1, ~, ce1] = getxmlnode (trow, "table:covered-table-cell", ce+1);
+          [tcell2, ~, ce2] = getxmlnode (trow, "table:table-cell", ce+1);
+          if (ce1 > 0 && ce2 > 0)
+            ## Both  table-cell and a table-covered-cell are present
+            if (ce1 < ce2)
+              ## table-covered cell before table-cell. Set pointer at its end
+              ce = ce1;
+              tcell = tcell1;
+              ## Signal code below that content parsing must be skipped
+              ce2 = 0;
+            else
+              ## table-cell before table-covered cell. Pointer to end of table-cell
+              ce = ce2;
+              tcell = tcell2;
+            endif
+          else
+            if (ce1 > 0)
+              ## Only table-covered-cell found
+              ce = ce1;
+              tcell = tcell1;
+            else
+              ## Only table-cell found
+              ce = ce2;
+              tcell = tcell2;
+            endif
+          endif
+
+          if (! isempty (tcell))
+            ## First check its repeat status
+            repcol = str2double (getxmlattv (tcell, "table:number-columns-repeated"));
+            ## Try to get value type
+            ctype = '';
+            if (ce2)
+              ctype = getxmlattv (tcell, "office:value-type");
+            endif
+            if (! isempty (ctype))
+              if (spsh_opts.formulas_as_text)
+                form = getxmlattv (tcell, "table:formula");
+                if (! isempty (form))
+                  ctype = "cformula";
+                endif
+              endif
+              ## Get value
+              cvalue = getxmlnode (tcell, "text:p")(9:end-9);
+              ## Put proper translation into rawarr
+              switch ctype
+                case "cformula"
+                  form = strrep (form(4:end), "&quot;", '"');
+                  form = strrep (form, "&lt;", "<");
+                  form = strrep (form, "&gt;", ">");
+                  form = strrep (form, "&amp;", "&");
+                  ## Pimp ranges in formulas
+                  form = regexprep (form, '\[\.(\w+)\]', '$1');
+                  form = regexprep (form, '\[\.(\w+):', '$1:');
+                  form = regexprep (form, ':\.(\w+)\]', ':$1');
+                  rawarr{ii, jj} = form;
+                case "float"
+                  rawarr{ii, jj} = str2double (cvalue);
+                case "percentage"
+                  rawarr{ii, jj} = str2double (cvalue(1:end-1));
+                case "currency"
+                  rawarr{ii, jj} = str2double (getxmlattv (tcell, "office:value"));                
+                case {"string", "date"}
+                  cvalue = strrep (cvalue, "&amp;", "&");
+                  cvalue = strrep (cvalue, "&quot;", '"');
+                  cvalue = strrep (cvalue, "&gt;", ">");
+                  cvalue = strrep (cvalue, "&lt;", "<");
+                  rawarr(ii, jj) = cvalue;
+                case "boolean"
+                  rawarr{ii, jj} = strcmpi (cvalue, "true");
+#                case "date"
+#                  ## FIXME As dates can have so many formats they're returned as strings
+                case "time"
+                  ## Time values usually have hours first, then minutes, optionally seconds
+                  hh = mi = ss = 0;
+                  cvalue = regexp (getxmlattv (tcell, "office:time-value"), '[0-9]*', "match");
+                  ## try-catch to catch missing seconds
+                  try
+                    hh = str2double (cvalue(1));
+                    mi = str2double (cvalue(2));
+                    ss = str2double (cvalue(3));
+                  catch
+                  end_try_catch
+                  rawarr{ii, jj} = datenum (0, 0, 0, hh, mi, ss);
+                otherwise
+                  ## Do nothing
+              endswitch
+            endif
+            ## Copy cell contents for repeated columns & bump column counter
+            if (isfinite (repcol))
+              rawarr(ii, jj+1:jj+repcol-1) = rawarr(ii, jj);
+              jj += repcol - 1;
+              repcol = '';
+            endif
+          endif
+        endwhile
+
+        ## Copy row contents to repeated rows & bump row counter
+        if (isfinite (reprow))
+          for kk=ii+1:min (nrows, ii+reprow-1)
+            rawarr(kk, :) = rawarr(ii, :);
+          endfor
+          ii += reprow - 1;
+          reprow = '';
+        endif
+      endif
+    endif
+
+  endwhile
+
+  ## If required strip leftmost empty columns
+  if (lcol > 1)
+    rawarr (:, 1:ncols) = rawarr (:, lcol:rcol);
+    rawarr (:, ncols+1:end) = [];
+  endif
+
+  ## Keep track of data rectangle limits
+  xls.limits = [1, ncols; 1, nrows];
+
+endfunction
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/io/inst/private/__OCT_spsh_close__.m	Fri Sep 27 17:59:58 2013 +0000
@@ -0,0 +1,39 @@
+## Copyright (C) 2013 Philip Nienhuis
+## 
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or
+## (at your option) any later version.
+## 
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+## 
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*- 
+## @deftypefn {Function File} {@var{xls} =} __OCT_spsh_close__ (@var{xls})
+## Internal function! do not call directly; close spreadsheet pointer
+## struct xls; for native OCT interface just set ito empty.
+##
+## @end deftypefn
+
+## Author: Philip Nenhuis <prnienhuis@users.sf.net>
+## Created: 2013-09-09
+## Updates:
+## 2013-09-23 Added in commented-out stanza for OOXML (.xlsx)
+
+function [xls] = __OCT_spsh_close__ (xls)
+
+## FIXME remove comments and fill OOXML clause
+#  if (strcmpi (xls.filename(end-3:end), ".ods"))
+    ## Not much to do here as files were closed in __OCT_spsh_open__
+    xls.changed = 0;
+#  else
+    ## For OOXML remove temp dir here
+#  endif
+
+endfunction
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/io/inst/private/__OCT_spsh_info__.m	Fri Sep 27 17:59:58 2013 +0000
@@ -0,0 +1,41 @@
+## Copyright (C) 2013 Philip Nienhuis
+## 
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or
+## (at your option) any later version.
+## 
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+## 
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*- 
+## @deftypefn {Function File} {@var{retval} =} __OCT_spsh_info__ (@var{x} @var{y})
+##
+## @seealso{}
+## @end deftypefn
+
+## Author: Philip Nienhuis <prnienhuis at users.sf.net>
+## Created: 2013-09-10
+## Updates:
+##
+
+function [ sh_names ] = __OCT_spsh_info__ (ods)
+
+  sh_names(:, 1) = ods.sheets.sh_names;
+  for ii=1:numel (ods.sheets.sh_names)
+    [ tr, lr, lc, rc ] = getusedrange (ods, ii);
+    if (tr)
+      sh_names(ii, 2) = sprintf ("%s:%s", calccelladdress (tr, lc),...
+                        calccelladdress (lr, rc));
+    else
+      sh_names(ii, 2) = "Empty";
+    endif
+  endfor
+
+endfunction
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/io/inst/private/__OCT_spsh_open__.m	Fri Sep 27 17:59:58 2013 +0000
@@ -0,0 +1,90 @@
+## Copyright (C) 2013 Philip Nienhuis
+## 
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or
+## (at your option) any later version.
+## 
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+## 
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*- 
+## @deftypefn {Function File} {@var{retval} =} __OCT_spsh_open__ (@var{x} @var{y})
+##
+## @seealso{}
+## @end deftypefn
+
+## Author: Philip Nienhuis <prnienhuis at users.sf.net>
+## File open stuff by Markus Bergholz
+## Created: 2013-09-08
+## Updates:
+## 2013-09-09 Wipe temp dir after opening as all content is in memory
+##            FIXME this needs to be adapted for future OOXML support
+## 2013-09-23 Fix copyright messages
+
+function [ xls, xlssupport, lastintf] = __OCT_spsh_open__ (xls, xwrite, filename, xlssupport, chk2, chk3)
+
+  ## Open and unzip file to temp location (code by Markus Bergholz)
+  ## create current work folder
+  tmpdir = tmpnam;
+  confirm_recursive_rmdir (0);      # this is needed for a silent delete of our tmpdir
+
+  %% http://savannah.gnu.org/bugs/index.php?39148
+  %% unpack.m taken from bugfix: http://hg.savannah.gnu.org/hgweb/octave/rev/45165d6c4738
+  %% needed for octave 3.6.x
+  unpack (filename, tmpdir, "unzip");
+
+  ## First check if we're reading ODS
+  if (chk3)
+    ## Yep. Read the actual data part in content.xml
+    fid = fopen (sprintf ("%s/content.xml", tmpdir), "r");
+    if (fid < 0)
+      ## File open error
+      error ("file %s couldn't be opened for reading", filename);
+    else
+      ## Read file contents. For some reason fgetl needs to be called twice
+      xml = fgets (fid);
+      xml = fgets (fid);
+      
+      ## File & expanded subdir are no longer needed for ODS
+      fclose (fid);
+      rmdir (tmpdir, "s");
+
+      ## To speed things up later on, get sheet names and starting indices
+      shtidx = strfind (xml, "<table:table table:name=");
+      shtidx = [ shtidx length(xml) ];
+      nsheets = numel (shtidx) - 1;
+      ## Get sheet names
+      sh_names = cell (1, nsheets);
+      for ii=1:nsheets
+        sh_names(ii) = xml(shtidx(ii)+25 : shtidx(ii)+23+index (xml(shtidx(ii)+25:end), '"'));
+      endfor
+
+      ## Fill ods pointer.
+      ## FIXME find a class that doesn't display as one looooong string
+      xls.workbook = xml;               # content.xml
+      xls.sheets.sh_names = sh_names;   # sheet names
+      xls.sheets.shtidx = shtidx;       # start &end indices of sheets
+      xls.xtype = "OCT";                # OCT is fall-back interface
+      xls.app = ' ';                    # location (subdir) of unzipped file for OOXML
+                                        # must NOT be an empty string!
+      xls.filename = filename;          # spreadsheet filename
+
+      lastintf = "OCT";
+      xlssupport += 1;
+
+    endif
+
+  elseif (chk2)
+    ## xlsx
+    ## FIXME  not implemented yet - Markus' job
+
+  endif  
+
+endfunction