changeset 28183:abcff237241f

Fix textscan handling of empty fields at end of line (Bug #57612). * oct-stream.cc (skip_whitespace): Change function prototype to have second argument, EOLstop, default to true. * oct-stream.cc (textscan_format_list::read_first_row): Add 2nd argument of false to call to skip_whitespace to reproduce old behavior. * oct-stream.cc (textscan::skip_delim): Delete unnecessary second argument of true to skip_whitespace(). Adjust comments for code. * file-io.cc (Ftextscan): Comment out BIST test for handling of precision which is already incompatibile with Matlab. Add 3 new BIST tests for bug #57612.
author Rik <rik@octave.org>
date Sun, 29 Mar 2020 11:26:16 -0700
parents 56349d9ca566
children c805c1b541fa
files libinterp/corefcn/file-io.cc libinterp/corefcn/oct-stream.cc
diffstat 2 files changed, 30 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/corefcn/file-io.cc	Sat Mar 28 15:59:47 2020 -0700
+++ b/libinterp/corefcn/file-io.cc	Sun Mar 29 11:26:16 2020 -0700
@@ -1641,8 +1641,10 @@
 %! c = textscan (str, "%4f %f", "delimiter", ";", "collectOutput", 1);
 %! assert (c, {[12, 34; 1234, 56789; 7, NaN]});
 
+## FIXME: Not Matlab compatible.  Matlab prioritizes precision over field width
+## so "12.234e+2", when read with "%10.2f %f", yields "12.23" and "4e+2".
 ## Ignore trailing delimiter, but use leading one
-%!test
+%!#test
 %! str = "12.234e+2,34, \n12345.789-9876j,78\n,10|3";
 %! c = textscan (str, "%10.2f %f", "delimiter", ",", "collectOutput", 1,
 %!                    "expChars", "e|");
@@ -2293,7 +2295,29 @@
 %! nm2 = textscan (txt, 'literal%s;literal%s', 'Delimiter', ';');
 %! assert (nm1, nm2);
 
+%!test <*57612>
+%! str = sprintf (['101,' '\n' '201,']);
+%! C = textscan (str, '%s%q', 'Delimiter', ',');
+%! assert (size (C), [1, 2]);
+%! assert (C{1}, { "101"; "201" });
+%! assert (C{2}, { ""; "" });
+
+%!test <*57612>
+%! str = sprintf (['101,' '\n' '201,']);
+%! C = textscan (str, '%s%f', 'Delimiter', ',');
+%! assert (size (C), [1, 2]);
+%! assert (C{1}, { "101"; "201" });
+%! assert (C{2}, [ NaN; NaN ]);
+
+%!test <*57612>
+%! str = sprintf (['101,' '\n' '201,']);
+%! C = textscan (str, '%s%d', 'Delimiter', ',');
+%! assert (size (C), [1, 2]);
+%! assert (C{1}, { "101"; "201" });
+%! assert (C{2}, int32 ([ 0; 0 ]));
+
 */
+
 // These tests have end-comment sequences, so can't just be in a comment
 #if 0
 ## Test unfinished comment
--- a/libinterp/corefcn/oct-stream.cc	Sat Mar 28 15:59:47 2020 -0700
+++ b/libinterp/corefcn/oct-stream.cc	Sun Mar 29 11:26:16 2020 -0700
@@ -1914,7 +1914,7 @@
 
     bool match_literal (delimited_stream& isp, const textscan_format_elt& elem);
 
-    int skip_whitespace (delimited_stream& is, bool EOLstop = false);
+    int skip_whitespace (delimited_stream& is, bool EOLstop = true);
 
     int skip_delim (delimited_stream& is);
 
@@ -2472,7 +2472,7 @@
     while (! ds.eof ())
       {
         bool already_skipped_delim = false;
-        ts.skip_whitespace (ds);
+        ts.skip_whitespace (ds, false);
         ds.progress_benchmark ();
         ts.scan_complex (ds, *fmt_elts[0], val);
         if (ds.fail ())
@@ -3893,8 +3893,8 @@
   int
   textscan::skip_delim (delimited_stream& is)
   {
-    int c1 = skip_whitespace (is, true);  // 'true': stop once EOL is read
-    if (delim_list.numel () == 0)         // single character delimiter
+    int c1 = skip_whitespace (is);  // Stop once EOL is read
+    if (delim_list.numel () == 0)   // single character delimiter
       {
         if (is_delim (c1) || c1 == eol1 || c1 == eol2)
           {
@@ -3944,7 +3944,7 @@
                 int prev = -1;
                 // skip multiple delims.
                 // Increment lines for each end-of-line seen; for \r\n, decrement
-                while (is && ((c1 = skip_whitespace (is, true))
+                while (is && ((c1 = skip_whitespace (is))
                               != std::istream::traits_type::eof ())
                        && (((c1 == eol1 || c1 == eol2) && ++lines)
                            || -1 != lookahead (is, delim_list, delim_len)))