changeset 14799:21197c43f984

Better Matlab compatibility of textscan, bugs #36356 and #36392 textscan.m: ensure trailing newline in data string before calling strread in case of CollectOutput parameter test added for uneven data column lengths + CollectOutput return cell array of proper dimension when no data were read rewrote parts of texinfo help text * strread.m: add explanation of role of trailing newline to texinfo help text
author Philip Nienhuis <prnienhuis@users.sf.net>
date Fri, 22 Jun 2012 19:09:30 +0200
parents fce841c941a5
children a922f768ee09
files scripts/io/strread.m scripts/io/textscan.m
diffstat 2 files changed, 67 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/io/strread.m	Thu Jun 21 18:25:23 2012 -0400
+++ b/scripts/io/strread.m	Fri Jun 22 19:09:30 2012 +0200
@@ -154,7 +154,21 @@
 ##
 ## @end table
 ##
-## @seealso{textscan, textread, load, dlmread, fscanf}
+## When the number of words in @var{str} doesn't match an exact multiple
+## of the number of format conversion specifiers, strread's behavior
+## depends on the last character of @var{str}:
+##
+## @table @asis
+## @item last character = "\n"
+## Data columns are padded with empty fields or Nan so that all columns
+## have equal length 
+##
+## @item last character is not "\n"
+## Data columns are not padded; strread returns columns of unequal length
+##
+## @end table
+##
+# @seealso{textscan, textread, load, dlmread, fscanf}
 ## @end deftypefn
 
 function varargout = strread (str, format = "%f", varargin)
--- a/scripts/io/textscan.m	Thu Jun 21 18:25:23 2012 -0400
+++ b/scripts/io/textscan.m	Fri Jun 22 19:09:30 2012 +0200
@@ -25,13 +25,13 @@
 ## @deftypefnx {Function File} {[@var{C}, @var{position}] =} textscan (@var{fid}, @dots{})
 ## Read data from a text file or string.
 ##
-## The file associated with @var{fid} is read and parsed according to
-## @var{format}.  The function behaves like @code{strread} except it works by
-## parsing a file instead of a string.  See the documentation of
-## @code{strread} for details.
+## The string @var{str} or file associated with @var{fid} is read from and
+## parsed according to @var{format}. The function behaves like @code{strread}
+## except it can also read from file instead of a string. See the documentation
+## of @code{strread} for details.
 ##
-## In addition to the options supported by
-## @code{strread}, this function supports a few more:
+## In addition to the options supported by @code{strread}, this function
+## supports a few more:
 ##
 ## @itemize
 ## @item "collectoutput":
@@ -50,16 +50,19 @@
 ## @item "returnonerror":
 ## If set to numerical 1 or true (default), return normally when read errors
 ## have been encountered.  If set to 0 or false, return an error and no data.
+## As the string or file is read by columns rather than by rows, and because
+## textscan is fairly forgiving as regards read errors, setting this option
+## may have little or no actual effect.
 ## @end itemize
 ##
 ## When reading from a character string, optional input argument @var{n}
 ## specifies the number of times @var{format} should be used (i.e., to limit
 ## the amount of data read).
-## When reading fro file, @var{n} specifies the number of data lines to read;
+## When reading from file, @var{n} specifies the number of data lines to read;
 ## in this sense it differs slightly from the format repeat count in strread.
 ##
-## The output @var{C} is a cell array whose length is given by the number
-## of format specifiers.
+## The output @var{C} is a cell array whose second dimension is determined
+## by the number of format specifiers.
 ##
 ## The second output, @var{position}, provides the position, in characters,
 ## from the beginning of the file.
@@ -80,14 +83,18 @@
     format = "%f";
   endif
 
+  if (! ischar (format))
+    error ("textscan: FORMAT must be a string");
+  endif
+
+  ## Determine the number of data fields & initialize output array
+  num_fields = numel (strfind (format, "%")) - numel (strfind (format, "%*"));
+  C = cell (1, num_fields);
+
   if (! (isa (fid, "double") && fid > 0) && ! ischar (fid))
     error ("textscan: first argument must be a file id or character string");
   endif
 
-  if (! ischar (format))
-    error ("textscan: FORMAT must be a string");
-  endif
-
   args = varargin;
   if (nargin > 2 && isnumeric (args{1}))
     nlines = args{1};
@@ -96,7 +103,6 @@
   endif
   if (nlines < 1)
     printf ("textscan: N = 0, no data read\n");
-    C = [];
     return
   endif
 
@@ -174,7 +180,6 @@
   ## Check for empty result
   if (isempty (str))
     warning ("textscan: no data read");
-    C = [];
     return;
   endif
 
@@ -249,9 +254,15 @@
   ## Determine the number of data fields
   num_fields = numel (strfind (format, "%")) - numel (strfind (format, "%*"));
 
-  ## Strip trailing EOL to avoid returning stray missing values (f. strread)
-  if (strcmp (str(end-length (eol_char) + 1 : end), eol_char));
-    str(end-length (eol_char) + 1 : end) = "";
+  ## Strip trailing EOL to avoid returning stray missing values (f. strread).
+  ## However, in case of CollectOutput request, presence of EOL is required
+  eol_at_end = strcmp (str(end-length (eol_char) + 1 : end), eol_char);
+  if (collop)
+    if (! eol_at_end)
+      str(end+1 : end+length (eol_char)) = eol_char;
+    endif
+  elseif (eol_at_end)
+     str(end-length (eol_char) + 1 : end) = "";
   endif
 
   ## Call strread to make it do the real work
@@ -316,14 +327,14 @@
 %! assert (b(1,:)', c{1}, 1e-5);
 %! assert (b(2,:)', c{2}, 1e-5);
 
-#%!test
-#%! str = "13, 72, NA, str1, 25\r\n// Middle line\r\n36, na, 05, str3, 6";
-#%! a = textscan (str, "%d %n %f %s %n", "delimiter", ",","treatAsEmpty", {"NA", "na"},"commentStyle", "//");
-#%! assert (a{1}, int32([13; 36]));
-#%! assert (a{2}, [72; NaN]);
-#%! assert (a{3}, [NaN; 5]);
-#%! assert (a{4}, {"str1"; "str3"});
-#%! assert (a{5}, [25; 6]);
+%!test
+%! str = "13, 72, NA, str1, 25\r\n// Middle line\r\n36, na, 05, str3, 6";
+%! a = textscan (str, "%d %n %f %s %n", "delimiter", ",","treatAsEmpty", {"NA", "na"},"commentStyle", "//");
+%! assert (a{1}, int32([13; 36]));
+%! assert (a{2}, [72; NaN]);
+%! assert (a{3}, [NaN; 5]);
+%! assert (a{4}, {"str1"; "str3"});
+%! assert (a{5}, [25; 6]);
 
 %!test
 %! str = "Km:10 = hhhBjjj miles16hour\r\n";
@@ -362,6 +373,21 @@
 %! assert (size(c{3}), [10, 2]);
 %! assert (size(c{2}), [10, 2]);
 
+%!test
+%% CollectOutput test with uneven column length files
+%! b = [10:10:100];
+%! b = [b; 8*b/5; 8*b*1000/5];
+%! str = sprintf ("%g miles/hr = %g (%g) kilometers (meters)/hr\n", b);
+%! str = [str "110 miles/hr"];
+%! fmt = "%f miles%s %s %f (%f) kilometers %*s";
+%! c = textscan (str, fmt, "collectoutput", 1);
+%! assert (size(c{1}), [11, 1]);
+%! assert (size(c{3}), [11, 2]);
+%! assert (size(c{2}), [11, 2]);
+%! assert (c{3}(end), NaN);
+%! assert (c{2}{11, 1}, "/hr");
+%! assert (isempty (c{2}{11, 2}), true);
+
 %% Test input validation
 %!error textscan ()
 %!error textscan (single (4))