changeset 6892:2121942fe8a4 octave-forge

More code cleanup & bug fixes. Improved memory usage and speed aspects.
author prnienhuis
date Fri, 19 Mar 2010 15:10:25 +0000
parents 3f4f89ead171
children f85fd631049f
files main/io/inst/ods2oct.m
diffstat 1 files changed, 38 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/main/io/inst/ods2oct.m	Fri Mar 19 09:39:47 2010 +0000
+++ b/main/io/inst/ods2oct.m	Fri Mar 19 15:10:25 2010 +0000
@@ -87,14 +87,26 @@
 
 ## Author: Philip Nienhuis
 ## Created: 2009-12-13
-## Latest update of ods2oct: 2009-12-30
-## Latest update of functions below: 2010-01-08
+## Updates:
+## 2009-12-30 First working version
+## 2010-03-19 Added check for odfdom version (should be 0.7.5 until further notice)
+##
+## Latest update of subfunctions below: 2010-03-19
 
 function [ rawarr, ods, rstatus ] = ods2oct (ods, wsh=1, datrange=[])
 
 	if (strcmp (ods.xtype, 'OTK'))
 		# Read ods file tru Java & ODF toolkit
-		[rawarr, ods, rstatus] = ods2jotk2oct (ods, wsh, datrange);
+		# Get odf toolkit .jar version. Versions 0.7.5. & 0.8 have widely different API
+		versn = java_invoke ('org.odftoolkit.odfdom.Version', 'getApplicationVersion');
+		if (strcmp (versn, '0.7.5'))
+			[rawarr, ods, rstatus] = ods2jotk2oct (ods, wsh, datrange);
+		elseif (strcmp (versn, '0.8'))
+			error ('odfdom version 0.8 not implemented yet - use version 0.7.5');
+#			[rawarr, ods, rstatus] = ods3jotk2oct (ods, wsh, datrange);
+		else
+			error ('Wrong ODF Toolkit version - only odfdom 0.7.5 or 0.8 supported');
+		endif
 		
 	elseif (strcmp (ods.xtype, 'JOD'))
 		[rawarr, ods, rstatus] = ods2jod2oct (ods, wsh, datrange);
@@ -139,6 +151,7 @@
 ##      "     Fixed reference to upper row in case of nr-rows-repeated top tablerow
 ##      "     Tamed down memory usage for rawarr when desired data range is given
 ##      "     Added call to getusedrange() for cases when o range was specified
+## 2010-03-19 More code cleanup & fixes for bugs introduced 18/3/2010 8-()
 
 function [ rawarr, ods, rstatus ] = ods2jotk2oct (ods, wsh=1, crange = [])
 
@@ -152,7 +165,7 @@
 								# makes physical copies only when needed (?)
 	xpath = ods.app.getXPath;
 	
-	# AFAICS ODS spreadsheets have the following hierarchy:
+	# AFAICS ODS spreadsheets have the following hierarchy (after Xpath processing):
 	# <table:table> - table nodes, the actual worksheets;
 	# <table:table-row> - row nodes, the rows in a worksheet;
 	# <table:table-cell> - cell nodes, the cells in a row;
@@ -177,7 +190,9 @@
 				wsh = ii;
 			endif
 		endwhile
-		if (ischar (wsh)) error (sprintf ("No worksheet '%s' found in file %s", wsh, ods.filename)); endif
+		if (ischar (wsh))
+			error (sprintf ("No worksheet '%s' found in file %s", wsh, ods.filename));
+		endif
 	elseif (wsh > nr_of_sheets || wsh < 1)
 		# We already have a numeric sheet pointer. If it's not in range:
 		error (sprintf ("Worksheet no. %d out of range (1 - %d)", wsh, nr_of_sheets));
@@ -197,20 +212,22 @@
 		[dummy, nrows, ncols, trow, lcol] = parse_sp_range (crange);
 		brow = min (trow + nrows - 1, nr_of_rows);
 		# Check ODS column limits
-		if (lcol > 1024 || trow > 65536) error ("ods2oct: invalid range; max 1024 columns & 65536 rows."); endif
+		if (lcol > 1024 || trow > 65536) 
+			error ("ods2oct: invalid range; max 1024 columns & 65536 rows."); 
+		endif
 		# Truncate range silently if needed
 		rcol = min (lcol + ncols - 1, 1024);
 		ncols = min (ncols, 1024 - lcol + 1);
 		nrows = min (nrows, 65536 - trow + 1);
 	endif
-	# Create storage for data content. We can't know max row length yet so expect the worst
-	rawarr = cell (brow, rcol);
+	# Create storage for data content
+	rawarr = cell (nrows, ncols);
 
 	# Prepare reading sheet row by row
 	rightmcol = 0;		# Used to find actual rightmost column
 	ii = trow - 1;		# Spreadsheet row counter
 	rowcnt = 0;
-	# Find requested uppermost requested *tablerow*. It may be influenced by nr-rows-repeated
+	# Find uppermost requested *tablerow*. It may be influenced by nr-rows-repeated
 	if (ii >= 1)
 		tfillrows = 0;
 		while (tfillrows < ii)
@@ -229,9 +246,9 @@
 		nr_of_cells = min (row.getLength (), rcol);
 		rightmcol = max (rightmcol, nr_of_cells);	# Keep track of max row length
 		# Read column (cell, "table-cell" in ODS speak) by column
-		jj = lcol; r_cols = 0;
-		while (r_cols <= 1024 && jj <= rcol)
-			tcell = row.getCellAt(jj-1); ++r_cols;
+		jj = lcol; 
+		while (jj <= rcol)
+			tcell = row.getCellAt(jj-1);
 			if (~isempty (tcell)) 		# If empty it's possibly in columns-repeated/spanned
 				if ~(index (char(tcell), 'text:p>Err:') || index (char(tcell), 'text:p>#DIV'))	
 					# Get data from cell
@@ -239,7 +256,7 @@
 					cvalue = tcell.getOfficeValueAttribute ();
 					switch deblank (ctype)
 						case  {'float', 'currency', 'percentage'}
-							rawarr(ii, jj) = cvalue;
+							rawarr(ii-trow+2, jj-lcol+1) = cvalue;
 						case 'date'
 							cvalue = tcell.getOfficeDateValueAttribute ();
 							# Dates are returned as octave datenums, i.e. 0-0-0000 based
@@ -250,9 +267,9 @@
 								hh = str2num (cvalue(12:13));
 								mm = str2num (cvalue(15:16));
 								ss = str2num (cvalue(18:19));
-								rawarr(ii, jj) = datenum (yr, mo, dy, hh, mm, ss);
+								rawarr(ii-trow+2, jj-lcol+1) = datenum (yr, mo, dy, hh, mm, ss);
 							else
-								rawarr(ii, jj) = datenum (yr, mo, dy);
+								rawarr(ii-trow+2, jj-lcol+1) = datenum (yr, mo, dy);
 							endif
 						case 'time'
 							cvalue = tcell.getOfficeTimeValueAttribute ();
@@ -260,14 +277,14 @@
 								hh = str2num (cvalue(3:4));
 								mm = str2num (cvalue(6:7));
 								ss = str2num (cvalue(9:10));
-								rawarr(ii, jj) = datenum (0, 0, 0, hh, mm, ss);
+								rawarr(ii-trow+2, jj-lcol+1) = datenum (0, 0, 0, hh, mm, ss);
 							endif
 						case 'boolean'
 							cvalue = tcell.getOfficeBooleanValueAttribute ();
-							rawarr(ii, jj) = cvalue; 
+							rawarr(ii-trow+2, jj-lcol+1) = cvalue; 
 						case 'string'
 							cvalue = tcell.getOfficeStringValueAttribute ();
-							if (isempty (cvalue))
+							if (isempty (cvalue))     # Happens with e.g., hyperlinks
 								tmp = char (tcell);
 								# Hack string value from between <text:p|r> </text:p|r> tags
 								ist = findstr (tmp, '<text:');
@@ -279,14 +296,11 @@
 									cvalue = tmp(ist:ien);
 								endif
 							endif
-							rawarr(ii, jj) = cvalue;
+							rawarr(ii-trow+2, jj-lcol+1)= cvalue;
 						otherwise
 							# Nothing
 					endswitch
 				endif
-				# Check for repeated columns (often empty columns, viz. to right of data)
-				# and add to column count
-				r_cols = r_cols + tcell.getTableNumberColumnsRepeatedAttribute () - 1;
 			endif
 			++jj;						# Next cell
 		endwhile
@@ -297,25 +311,10 @@
 			# Expand rawarr cf. table-row
 			nr_of_rows = nr_of_rows + extrarows;
 			ii = ii + extrarows;
-			if (isempty (crange))
-				'increased'
-				nrows = min (65536, nrows + extrarows);
-				brow = min (trow + nrows - 1, nr_of_rows);
-				# Increase return argument size if needed
-				tmp = cell (extrarows, rcol);
-				rawarr = [rawarr; tmp];
-				# Copy repeated row contents over
-				for kk = ii+1:ii+extrarows
-					rawarr (kk, :) = rawarr (ii, :);
-				endfor
-			endif
 		endif
 		++ii;
 	endwhile
 
-	# Pre-crop rawarr from right (max was 1024) and bottom
-	rawarr = rawarr (1:brow, 1:rightmcol);
-
 	# Crop rawarr from all empty outer rows & columns just like Excel does
 	# & keep track of limits
 	emptr = cellfun('isempty', rawarr);
@@ -325,11 +324,11 @@
 	else
 		irowt = 1;
 		while (all (emptr(irowt, :))), irowt++; endwhile
-		irowb = brow;
+		irowb = nrows;
 		while (all (emptr(irowb, :))), irowb--; endwhile
 		icoll = 1;
 		while (all (emptr(:, icoll))), icoll++; endwhile
-		icolr = rightmcol;
+		icolr = ncols;
 		while (all (emptr(:, icolr))), icolr--; endwhile
 		# Crop textarray
 		rawarr = rawarr(irowt:irowb, icoll:icolr);