# HG changeset patch # User Rik # Date 1585506376 25200 # Node ID abcff237241f79118f25a647389797586979d627 # Parent 56349d9ca5667e6673f38642302ddf0fc8dbc3f4 Fix textscan handling of empty fields at end of line (Bug #57612). * oct-stream.cc (skip_whitespace): Change function prototype to have second argument, EOLstop, default to true. * oct-stream.cc (textscan_format_list::read_first_row): Add 2nd argument of false to call to skip_whitespace to reproduce old behavior. * oct-stream.cc (textscan::skip_delim): Delete unnecessary second argument of true to skip_whitespace(). Adjust comments for code. * file-io.cc (Ftextscan): Comment out BIST test for handling of precision which is already incompatibile with Matlab. Add 3 new BIST tests for bug #57612. diff -r 56349d9ca566 -r abcff237241f libinterp/corefcn/file-io.cc --- a/libinterp/corefcn/file-io.cc Sat Mar 28 15:59:47 2020 -0700 +++ b/libinterp/corefcn/file-io.cc Sun Mar 29 11:26:16 2020 -0700 @@ -1641,8 +1641,10 @@ %! c = textscan (str, "%4f %f", "delimiter", ";", "collectOutput", 1); %! assert (c, {[12, 34; 1234, 56789; 7, NaN]}); +## FIXME: Not Matlab compatible. Matlab prioritizes precision over field width +## so "12.234e+2", when read with "%10.2f %f", yields "12.23" and "4e+2". ## Ignore trailing delimiter, but use leading one -%!test +%!#test %! str = "12.234e+2,34, \n12345.789-9876j,78\n,10|3"; %! c = textscan (str, "%10.2f %f", "delimiter", ",", "collectOutput", 1, %! "expChars", "e|"); @@ -2293,7 +2295,29 @@ %! nm2 = textscan (txt, 'literal%s;literal%s', 'Delimiter', ';'); %! assert (nm1, nm2); +%!test <*57612> +%! str = sprintf (['101,' '\n' '201,']); +%! C = textscan (str, '%s%q', 'Delimiter', ','); +%! assert (size (C), [1, 2]); +%! assert (C{1}, { "101"; "201" }); +%! assert (C{2}, { ""; "" }); + +%!test <*57612> +%! str = sprintf (['101,' '\n' '201,']); +%! C = textscan (str, '%s%f', 'Delimiter', ','); +%! assert (size (C), [1, 2]); +%! assert (C{1}, { "101"; "201" }); +%! assert (C{2}, [ NaN; NaN ]); + +%!test <*57612> +%! str = sprintf (['101,' '\n' '201,']); +%! C = textscan (str, '%s%d', 'Delimiter', ','); +%! assert (size (C), [1, 2]); +%! assert (C{1}, { "101"; "201" }); +%! assert (C{2}, int32 ([ 0; 0 ])); + */ + // These tests have end-comment sequences, so can't just be in a comment #if 0 ## Test unfinished comment diff -r 56349d9ca566 -r abcff237241f libinterp/corefcn/oct-stream.cc --- a/libinterp/corefcn/oct-stream.cc Sat Mar 28 15:59:47 2020 -0700 +++ b/libinterp/corefcn/oct-stream.cc Sun Mar 29 11:26:16 2020 -0700 @@ -1914,7 +1914,7 @@ bool match_literal (delimited_stream& isp, const textscan_format_elt& elem); - int skip_whitespace (delimited_stream& is, bool EOLstop = false); + int skip_whitespace (delimited_stream& is, bool EOLstop = true); int skip_delim (delimited_stream& is); @@ -2472,7 +2472,7 @@ while (! ds.eof ()) { bool already_skipped_delim = false; - ts.skip_whitespace (ds); + ts.skip_whitespace (ds, false); ds.progress_benchmark (); ts.scan_complex (ds, *fmt_elts[0], val); if (ds.fail ()) @@ -3893,8 +3893,8 @@ int textscan::skip_delim (delimited_stream& is) { - int c1 = skip_whitespace (is, true); // 'true': stop once EOL is read - if (delim_list.numel () == 0) // single character delimiter + int c1 = skip_whitespace (is); // Stop once EOL is read + if (delim_list.numel () == 0) // single character delimiter { if (is_delim (c1) || c1 == eol1 || c1 == eol2) { @@ -3944,7 +3944,7 @@ int prev = -1; // skip multiple delims. // Increment lines for each end-of-line seen; for \r\n, decrement - while (is && ((c1 = skip_whitespace (is, true)) + while (is && ((c1 = skip_whitespace (is)) != std::istream::traits_type::eof ()) && (((c1 == eol1 || c1 == eol2) && ++lines) || -1 != lookahead (is, delim_list, delim_len)))