changeset 28182:56349d9ca566

Fix textscan when literal and delimiter might be the same character (bug #58008). * file-io.cc (Ftextscan): Add BIST test for bug #58008. * oct-stream.cc (textscan::read_format_once): Skip delimiters at beginning of text before trying to match new fmt element. However, if fmt element is a literal which begins with a delimiter character, don't skip, because text must match exactly.
author Rik <rik@octave.org>
date Sat, 28 Mar 2020 15:59:47 -0700
parents 2a47c8633982
children abcff237241f
files libinterp/corefcn/file-io.cc libinterp/corefcn/oct-stream.cc
diffstat 2 files changed, 14 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/corefcn/file-io.cc	Fri Mar 27 13:13:09 2020 -0700
+++ b/libinterp/corefcn/file-io.cc	Sat Mar 28 15:59:47 2020 -0700
@@ -2285,6 +2285,14 @@
 %! obs = textscan (str, "%q", "delimiter", ",");
 %! assert (obs, { { "a,b"; "c" } });
 
+%!test <*58008>
+%! txt = sprintf ('literal_other_1_1;literal_other_1_2\nliteral_other_2_1;literal_other_2_2\nliteral_other_3_1;literal_other_3_2');
+%! nm1 = textscan (txt, 'literal%s literal%s', 'Delimiter', ';');
+%! assert (nm1{1}, {"_other_1_1" ; "_other_2_1" ; "_other_3_1"});
+%! assert (nm1{2}, {"_other_1_2" ; "_other_2_2" ; "_other_3_2"});
+%! nm2 = textscan (txt, 'literal%s;literal%s', 'Delimiter', ';');
+%! assert (nm1, nm2);
+
 */
 // These tests have end-comment sequences, so can't just be in a comment
 #if 0
--- a/libinterp/corefcn/oct-stream.cc	Fri Mar 27 13:13:09 2020 -0700
+++ b/libinterp/corefcn/oct-stream.cc	Sat Mar 28 15:59:47 2020 -0700
@@ -3500,11 +3500,12 @@
         elem = fmt_list.next ();
         char *pos = is.tellg ();
 
-        // FIXME: these conversions "ignore delimiters".  Should they include
-        // delimiters at the start of the conversion, or can those be skipped?
-        if (elem->type != textscan_format_elt::literal_conversion
-            // && elem->type != '[' && elem->type != '^' && elem->type != 'c'
-           )
+        // Skip delimiter before reading the next fmt conversion,
+        // unless the fmt is a string literal which begins with a delimiter,
+        // in which case the literal must match everything.  Bug #58008
+        if (elem->type != textscan_format_elt::literal_conversion)
+          skip_delim (is);
+        else if (! is_delim (elem->text[0]))
           skip_delim (is);
 
         if (is.eof ())