Mercurial > octave
changeset 28182:56349d9ca566
Fix textscan when literal and delimiter might be the same character (bug #58008).
* file-io.cc (Ftextscan): Add BIST test for bug #58008.
* oct-stream.cc (textscan::read_format_once): Skip delimiters at beginning of
text before trying to match new fmt element. However, if fmt element is a
literal which begins with a delimiter character, don't skip, because text
must match exactly.
author | Rik <rik@octave.org> |
---|---|
date | Sat, 28 Mar 2020 15:59:47 -0700 |
parents | 2a47c8633982 |
children | abcff237241f |
files | libinterp/corefcn/file-io.cc libinterp/corefcn/oct-stream.cc |
diffstat | 2 files changed, 14 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/libinterp/corefcn/file-io.cc Fri Mar 27 13:13:09 2020 -0700 +++ b/libinterp/corefcn/file-io.cc Sat Mar 28 15:59:47 2020 -0700 @@ -2285,6 +2285,14 @@ %! obs = textscan (str, "%q", "delimiter", ","); %! assert (obs, { { "a,b"; "c" } }); +%!test <*58008> +%! txt = sprintf ('literal_other_1_1;literal_other_1_2\nliteral_other_2_1;literal_other_2_2\nliteral_other_3_1;literal_other_3_2'); +%! nm1 = textscan (txt, 'literal%s literal%s', 'Delimiter', ';'); +%! assert (nm1{1}, {"_other_1_1" ; "_other_2_1" ; "_other_3_1"}); +%! assert (nm1{2}, {"_other_1_2" ; "_other_2_2" ; "_other_3_2"}); +%! nm2 = textscan (txt, 'literal%s;literal%s', 'Delimiter', ';'); +%! assert (nm1, nm2); + */ // These tests have end-comment sequences, so can't just be in a comment #if 0
--- a/libinterp/corefcn/oct-stream.cc Fri Mar 27 13:13:09 2020 -0700 +++ b/libinterp/corefcn/oct-stream.cc Sat Mar 28 15:59:47 2020 -0700 @@ -3500,11 +3500,12 @@ elem = fmt_list.next (); char *pos = is.tellg (); - // FIXME: these conversions "ignore delimiters". Should they include - // delimiters at the start of the conversion, or can those be skipped? - if (elem->type != textscan_format_elt::literal_conversion - // && elem->type != '[' && elem->type != '^' && elem->type != 'c' - ) + // Skip delimiter before reading the next fmt conversion, + // unless the fmt is a string literal which begins with a delimiter, + // in which case the literal must match everything. Bug #58008 + if (elem->type != textscan_format_elt::literal_conversion) + skip_delim (is); + else if (! is_delim (elem->text[0])) skip_delim (is); if (is.eof ())