Mercurial > forge
changeset 12063:c93f5089bda2 octave-forge
New files for experimental OCT (native Octave) interface
author | prnienhuis |
---|---|
date | Fri, 27 Sep 2013 17:59:58 +0000 |
parents | 8e1f276b1202 |
children | 07169c2596f2 |
files | main/io/inst/private/__OCT_getusedrange__.m main/io/inst/private/__OCT_ods2oct__.m main/io/inst/private/__OCT_spsh_close__.m main/io/inst/private/__OCT_spsh_info__.m main/io/inst/private/__OCT_spsh_open__.m |
diffstat | 5 files changed, 603 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/main/io/inst/private/__OCT_getusedrange__.m Fri Sep 27 17:59:58 2013 +0000 @@ -0,0 +1,211 @@ +## Copyright (C) 2013 Philip Nienhuis +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, see +## <http://www.gnu.org/licenses/>. + +## -*- texinfo -*- +## @deftypefn {Function File} {@var{retval} =} __OCT_getusedrange__ (@var{x} @var{y}) +## Get leftmost & rightmost occupied column numbers, and topmost and +## lowermost occupied row numbers (base 1). +## +## @seealso{} +## @end deftypefn + +## Author: Philip Nienhuis <prnienhuis at users.sf.net> +## Created: 2013-09-08 +## Updates: +## 2013-09-23 Prepared for adding in OOXML +## 2013-09-26 Improved code to skip last empty columns in column count + +function [ trow, brow, lcol, rcol ] = __OCT_getusedrange__ (spptr, ii) + + if (strcmpi (spptr.filename(end-3:end), ".ods")) + [ trow, brow, lcol, rcol ] = __OCT_ods_getusedrange__ (spptr, ii); + else + [ trow, brow, lcol, rcol ] = __OCT_xlsx_getusedrange__ (spptr, ii); + endif + +endfunction + + +##=============================OOXML======================== +function [ trow, brow, lcol, rcol ] = __OCT_xlsx_getusedrange__ (spptr, ii); + + trow = brow = lcol = rcol = 0; + + ## FIXME OOXML stuff (.xlsx) here + +endfunction + +##==============================ODS========================= +function [ trow, brow, lcol, rcol ] = __OCT_ods_getusedrange__ (spptr, ii) + + trow = brow = lcol = rcol = 0; + + ## Check input + nsheets = numel (spptr.sheets.sh_names); + if (ii > nsheets) + error ("getusedrange: sheet index (%d) out of range (1 - %d)", ii, nsheets); + endif + + ## Get requested sheet + sheet = spptr.workbook(spptr.sheets.shtidx(ii):spptr.sheets.shtidx(ii+1)-1); + + ## Check if sheet contains any cell content at all + ## FIXME: in far-fetched cases, cell string content may contain ' office:' too + if (! index (sheet, " office:")) + return + endif + + ## Assess number of spreadsheet rows out of table-rows + rowidx = [strfind(sheet, "<table:table-row") length(sheet)]; + nrows1 = nrows = numel (rowidx) - 1; + ## Get and count repeated table-row occurences + reprows = strfind (sheet, "table:number-rows-repeated"); + repcnt1 = 0; + ## Find which rows contain rep counts + if (! isempty (reprows)) + for ii=1:numel (reprows) + irow = find ((reprows(ii) > rowidx))(end); + tblrow = sheet(rowidx(irow):rowidx(irow+1)); + repcnt = str2double (getxmlattv (tblrow, "table:number-rows-repeated")) - 1; + ## Add repcount to temporary counter + repcnt1 += repcnt; + ## Check if this row contains any data + if (index (tblrow, "office:")) + ## If yes, add repcounter to nrows and reset it + nrows += repcnt1; + repcnt1 = 0; + else + ## If no, check if there's data in the next table-row + tblrow = getxmlnode (sheet(rowidx(irow+1):end), "table:table-row"); + if (! isempty (tblrow)) + if (index (tblrow, "office:")) + ## If yes, add repcounter to nrows and reset it + nrows += repcnt1; + repcnt1 = 0; + endif + endif + endif + endfor + else + + endif + + ## Set spreadsheet upper data row and count columns + ncol = 0; + re = 1; + for jj=1:nrows1 + ## Get a table row + [trow1, ~, re] = getxmlnode (sheet, "table:table-row", re); + + ## Prepare to count columns in row + tcell = " "; + tcidx = 1; + + ## Find top row index. Only for first table-row, check if empty + if (! ncol) + if (index (trow1, "office:")) + trow = 1; + else + ## Apparently a placeholder table-row + repcnt = str2double (getxmlattv (trow1, "table:number-rows-repeated")); + if (isfinite (repcnt)) + ## First row with data is below this table row + trow = repcnt + 1; + else + ## Upper table row is a single row + trow = 2; + endif + endif + + ## Explore number of columns in row (should match that of entire sheet) + ## Older OOo versions fill the entire width with nr-cols-repeated attrib + while (! isempty (tcell)) + emptycols = 0; + ## Try to get next table-cell + [tcell, ~, tcidx] = getxmlnode (trow1, "table:table-cell", tcidx); + if (! isempty (tcell)) + repcolatt = getxmlattv (tcell, "table:number-columns-repeated"); + repcol = str2double (repcolatt); + if (! isfinite (repcol)) + emptycols += repcol; + ## Check if cell has data + if (index (tcell, " office:")) + ncol += emptycols; + emptycols = 0; + endif + else + if (index (tcell, " office:")) + ncol += 1 + emptycols; + emptycols = 0; + else + emptycols++; + endif + endif + endif + endwhile + if (ncol) + lcol = ncol; + else + lcol = ncol = repcol; + endif + + ## For subsequent rows, just check if 1st & last tcell contain repcols + ## On older OOo versions an empty to row may have 1024 cols, and ncol = 0 + else + ## Get indices of all table-cells + tcidx = regexp (trow1, '<table:table-cell', "start"); + ## Leftmost table-cell + tcell = getxmlnode (trow1, "table:table-cell"); + repcol = str2double (getxmlattv (tcell, "table:number-columns-repeated")); + if (index (tcell, " office:")) + ## Yes. Leftmost table-cell contains data + lcol = 1; + else + if (isfinite (repcol)) + lcol = min (lcol, repcol + 1); + else + lcol = 2; + endif + endif + ## Rightmost table-cell, if row contains more than one tcell + if (numel (tcidx) > 1) + rc = str2double (getxmlattv (trow1(tcidx(end):end), "table:number-columns-repeated")); + if (! isfinite (rc)) + ## Check for data content + if (index (trow1(tcidx(end):end), " office:value")) + ncol = min (ncol, numel (tcidx)); + endif + endif + ## + endif + endif + + + ## Check if last table-row contains any data + if (jj == nrows1) + if (! index (trow1, " office:")) + nrows -= 1; + endif + endif + + endfor + + if (ncol > 0) + rcol = ncol; + brow = nrows; + endif + +endfunction
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/main/io/inst/private/__OCT_ods2oct__.m Fri Sep 27 17:59:58 2013 +0000 @@ -0,0 +1,222 @@ +## Copyright (C) 2013 Philip Nienhuis +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, see +## <http://www.gnu.org/licenses/>. + +## -*- texinfo -*- +## @deftypefn {Function File} {@var{retval} =} __OCT_ods2oct__ (@var{x} @var{y}) +## +## @seealso{} +## @end deftypefn + +## Author: Philip Nienhuis <prnienhuis at users.sf.net> +## Created: 2013-09-08 +## Updates: +## 2013-09-09 Fix getxmlnode call +## '' boolean, percentage & currency tags properly handled +## 2013-09-11 Properly skip empty upper table-rows +## '' Return dates as strings to avoid misinterpreting plethora of formats +## '' Try-catch construct for time values +## '' Formula reading support +## 2013-09-23 Renamed to __OCT_ods2oct__.m + +function [ rawarr, xls, rstatus] = __OCT_ods2oct__ (xls, wsh, cellrange='', spsh_opts) + + rstatus = 0; + + ## Check if requested worksheet exists in the file & if so, get sheet + if (isnumeric (wsh)) + if (wsh > numel (xls.sheets.sh_names) || wsh < 1) + error ("ods2oct: sheet number (%d) out of range (1 - %d)", wsh, numel (xls.sheets.sh_names)); + endif + elseif (ischar (wsh)) + idx = strmatch (wsh, ods.sheets.sh_names); + if (isempty (idx)) + error ("ods2oct: sheet '%s' not found in file %s", wsh, xls.filename); + endif + wsh = idx; + endif + sheet = xls.workbook(xls.sheets.shtidx(wsh):xls.sheets.shtidx(wsh+1)); + + ## Check ranges + [ firstrow, lastrow, lcol, rcol ] = getusedrange (xls, wsh); + ## FIXME first row & left col always 1 + if (isempty (cellrange)) + if (firstrow == 0 && lastrow == 0) + ## Empty sheet + rawarr = {}; + printf ("Worksheet '%s' contains no data\n", xls.sheets.sh_names{wsh}); + rstatus = 1; + return; + else + nrows = lastrow - firstrow + 1; + ncols = rcol - lcol + 1; + endif + else + [topleft, nrows, ncols, firstrow, lcol] = parse_sp_range (cellrange); + ## Check if requested range exists + lastrow = min (lastrow, firstrow + nrows - 1); + rcol = min (rcol, lcol + ncols - 1); + endif + + rawarr = cell (nrows, rcol); + + ## Get data + re = 1; + ii = 0; + trow = " "; + ## Row index ii below does not necessarily match table-rows! + while (ii < lastrow && (! isempty (trow))) + ## Get next table-row + [trow, ~, re] = getxmlnode (sheet, "table:table-row", re); + + if (! isempty (trow)) + ## Check if table-row has any data + datrow = index (trow, " office:"); + + ## Only process table-row contents if it has any data. Ski[ upper + ## empty table-rows (that's why we need an OR), only start counting + ## with the first table-row containing data + if (datrow || ii) + ++ii; + ## Check repeat status + reprow = str2double (getxmlattv (trow, "table:number-rows-repeated")); + ce = 0; + jj = 0; + tcell = " "; + ## Column index jj below does not necessarily match table-cells! + while (jj < rcol && (! isempty (tcell))) + ++jj; + + ## Get next table-cell. First see if it is covered. + [tcell1, ~, ce1] = getxmlnode (trow, "table:covered-table-cell", ce+1); + [tcell2, ~, ce2] = getxmlnode (trow, "table:table-cell", ce+1); + if (ce1 > 0 && ce2 > 0) + ## Both table-cell and a table-covered-cell are present + if (ce1 < ce2) + ## table-covered cell before table-cell. Set pointer at its end + ce = ce1; + tcell = tcell1; + ## Signal code below that content parsing must be skipped + ce2 = 0; + else + ## table-cell before table-covered cell. Pointer to end of table-cell + ce = ce2; + tcell = tcell2; + endif + else + if (ce1 > 0) + ## Only table-covered-cell found + ce = ce1; + tcell = tcell1; + else + ## Only table-cell found + ce = ce2; + tcell = tcell2; + endif + endif + + if (! isempty (tcell)) + ## First check its repeat status + repcol = str2double (getxmlattv (tcell, "table:number-columns-repeated")); + ## Try to get value type + ctype = ''; + if (ce2) + ctype = getxmlattv (tcell, "office:value-type"); + endif + if (! isempty (ctype)) + if (spsh_opts.formulas_as_text) + form = getxmlattv (tcell, "table:formula"); + if (! isempty (form)) + ctype = "cformula"; + endif + endif + ## Get value + cvalue = getxmlnode (tcell, "text:p")(9:end-9); + ## Put proper translation into rawarr + switch ctype + case "cformula" + form = strrep (form(4:end), """, '"'); + form = strrep (form, "<", "<"); + form = strrep (form, ">", ">"); + form = strrep (form, "&", "&"); + ## Pimp ranges in formulas + form = regexprep (form, '\[\.(\w+)\]', '$1'); + form = regexprep (form, '\[\.(\w+):', '$1:'); + form = regexprep (form, ':\.(\w+)\]', ':$1'); + rawarr{ii, jj} = form; + case "float" + rawarr{ii, jj} = str2double (cvalue); + case "percentage" + rawarr{ii, jj} = str2double (cvalue(1:end-1)); + case "currency" + rawarr{ii, jj} = str2double (getxmlattv (tcell, "office:value")); + case {"string", "date"} + cvalue = strrep (cvalue, "&", "&"); + cvalue = strrep (cvalue, """, '"'); + cvalue = strrep (cvalue, ">", ">"); + cvalue = strrep (cvalue, "<", "<"); + rawarr(ii, jj) = cvalue; + case "boolean" + rawarr{ii, jj} = strcmpi (cvalue, "true"); +# case "date" +# ## FIXME As dates can have so many formats they're returned as strings + case "time" + ## Time values usually have hours first, then minutes, optionally seconds + hh = mi = ss = 0; + cvalue = regexp (getxmlattv (tcell, "office:time-value"), '[0-9]*', "match"); + ## try-catch to catch missing seconds + try + hh = str2double (cvalue(1)); + mi = str2double (cvalue(2)); + ss = str2double (cvalue(3)); + catch + end_try_catch + rawarr{ii, jj} = datenum (0, 0, 0, hh, mi, ss); + otherwise + ## Do nothing + endswitch + endif + ## Copy cell contents for repeated columns & bump column counter + if (isfinite (repcol)) + rawarr(ii, jj+1:jj+repcol-1) = rawarr(ii, jj); + jj += repcol - 1; + repcol = ''; + endif + endif + endwhile + + ## Copy row contents to repeated rows & bump row counter + if (isfinite (reprow)) + for kk=ii+1:min (nrows, ii+reprow-1) + rawarr(kk, :) = rawarr(ii, :); + endfor + ii += reprow - 1; + reprow = ''; + endif + endif + endif + + endwhile + + ## If required strip leftmost empty columns + if (lcol > 1) + rawarr (:, 1:ncols) = rawarr (:, lcol:rcol); + rawarr (:, ncols+1:end) = []; + endif + + ## Keep track of data rectangle limits + xls.limits = [1, ncols; 1, nrows]; + +endfunction
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/main/io/inst/private/__OCT_spsh_close__.m Fri Sep 27 17:59:58 2013 +0000 @@ -0,0 +1,39 @@ +## Copyright (C) 2013 Philip Nienhuis +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, see +## <http://www.gnu.org/licenses/>. + +## -*- texinfo -*- +## @deftypefn {Function File} {@var{xls} =} __OCT_spsh_close__ (@var{xls}) +## Internal function! do not call directly; close spreadsheet pointer +## struct xls; for native OCT interface just set ito empty. +## +## @end deftypefn + +## Author: Philip Nenhuis <prnienhuis@users.sf.net> +## Created: 2013-09-09 +## Updates: +## 2013-09-23 Added in commented-out stanza for OOXML (.xlsx) + +function [xls] = __OCT_spsh_close__ (xls) + +## FIXME remove comments and fill OOXML clause +# if (strcmpi (xls.filename(end-3:end), ".ods")) + ## Not much to do here as files were closed in __OCT_spsh_open__ + xls.changed = 0; +# else + ## For OOXML remove temp dir here +# endif + +endfunction
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/main/io/inst/private/__OCT_spsh_info__.m Fri Sep 27 17:59:58 2013 +0000 @@ -0,0 +1,41 @@ +## Copyright (C) 2013 Philip Nienhuis +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, see +## <http://www.gnu.org/licenses/>. + +## -*- texinfo -*- +## @deftypefn {Function File} {@var{retval} =} __OCT_spsh_info__ (@var{x} @var{y}) +## +## @seealso{} +## @end deftypefn + +## Author: Philip Nienhuis <prnienhuis at users.sf.net> +## Created: 2013-09-10 +## Updates: +## + +function [ sh_names ] = __OCT_spsh_info__ (ods) + + sh_names(:, 1) = ods.sheets.sh_names; + for ii=1:numel (ods.sheets.sh_names) + [ tr, lr, lc, rc ] = getusedrange (ods, ii); + if (tr) + sh_names(ii, 2) = sprintf ("%s:%s", calccelladdress (tr, lc),... + calccelladdress (lr, rc)); + else + sh_names(ii, 2) = "Empty"; + endif + endfor + +endfunction
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/main/io/inst/private/__OCT_spsh_open__.m Fri Sep 27 17:59:58 2013 +0000 @@ -0,0 +1,90 @@ +## Copyright (C) 2013 Philip Nienhuis +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, see +## <http://www.gnu.org/licenses/>. + +## -*- texinfo -*- +## @deftypefn {Function File} {@var{retval} =} __OCT_spsh_open__ (@var{x} @var{y}) +## +## @seealso{} +## @end deftypefn + +## Author: Philip Nienhuis <prnienhuis at users.sf.net> +## File open stuff by Markus Bergholz +## Created: 2013-09-08 +## Updates: +## 2013-09-09 Wipe temp dir after opening as all content is in memory +## FIXME this needs to be adapted for future OOXML support +## 2013-09-23 Fix copyright messages + +function [ xls, xlssupport, lastintf] = __OCT_spsh_open__ (xls, xwrite, filename, xlssupport, chk2, chk3) + + ## Open and unzip file to temp location (code by Markus Bergholz) + ## create current work folder + tmpdir = tmpnam; + confirm_recursive_rmdir (0); # this is needed for a silent delete of our tmpdir + + %% http://savannah.gnu.org/bugs/index.php?39148 + %% unpack.m taken from bugfix: http://hg.savannah.gnu.org/hgweb/octave/rev/45165d6c4738 + %% needed for octave 3.6.x + unpack (filename, tmpdir, "unzip"); + + ## First check if we're reading ODS + if (chk3) + ## Yep. Read the actual data part in content.xml + fid = fopen (sprintf ("%s/content.xml", tmpdir), "r"); + if (fid < 0) + ## File open error + error ("file %s couldn't be opened for reading", filename); + else + ## Read file contents. For some reason fgetl needs to be called twice + xml = fgets (fid); + xml = fgets (fid); + + ## File & expanded subdir are no longer needed for ODS + fclose (fid); + rmdir (tmpdir, "s"); + + ## To speed things up later on, get sheet names and starting indices + shtidx = strfind (xml, "<table:table table:name="); + shtidx = [ shtidx length(xml) ]; + nsheets = numel (shtidx) - 1; + ## Get sheet names + sh_names = cell (1, nsheets); + for ii=1:nsheets + sh_names(ii) = xml(shtidx(ii)+25 : shtidx(ii)+23+index (xml(shtidx(ii)+25:end), '"')); + endfor + + ## Fill ods pointer. + ## FIXME find a class that doesn't display as one looooong string + xls.workbook = xml; # content.xml + xls.sheets.sh_names = sh_names; # sheet names + xls.sheets.shtidx = shtidx; # start &end indices of sheets + xls.xtype = "OCT"; # OCT is fall-back interface + xls.app = ' '; # location (subdir) of unzipped file for OOXML + # must NOT be an empty string! + xls.filename = filename; # spreadsheet filename + + lastintf = "OCT"; + xlssupport += 1; + + endif + + elseif (chk2) + ## xlsx + ## FIXME not implemented yet - Markus' job + + endif + +endfunction