# HG changeset patch # User Markus Mützel # Date 1656963495 -7200 # Node ID 7d3bda173b6310e853706744f6d496476fbc0a93 # Parent d8d90ba892b03e8aaf8434324f01a674cb6ffdae regexp: Be more thorough in detecting named patterns. * liboctave/util/lo-regexp.cc (regexp::compile_internal): Check more thoroughly if named pattern expressions are complete before adapting syntax. * libinterp/corefcn/regexp.cc (Fregexp): Add tests. diff -r d8d90ba892b0 -r 7d3bda173b63 libinterp/corefcn/regexp.cc --- a/libinterp/corefcn/regexp.cc Mon Jul 04 21:36:48 2022 +0200 +++ b/libinterp/corefcn/regexp.cc Mon Jul 04 21:38:15 2022 +0200 @@ -1200,6 +1200,12 @@ %! assert (regexp ('foo!+bar\nbar!+foo', '.\>'), [3, 4, 8, 13, 14, 18]); %! assert (regexp ('foo!+bar\nbar!+foo', '\<\w'), [1, 6, 10, 16]); +## Test "incomplete" named patterns +%!assert <*62705> (regexpi ('<', '\(?<'), 1) +%!assert <*62705> (regexpi ('', '\(?'), 1) +%!assert <*62705> (regexpi ('', '\(?\)?'), 1) +%!assert <62705> (regexpi ('a', '\(?a\)?'), 1) + ## Test input validation %!error regexp ('string', 'tri', 'BadArg') %!error regexp ('string') diff -r d8d90ba892b0 -r 7d3bda173b63 liboctave/util/lo-regexp.cc --- a/liboctave/util/lo-regexp.cc Mon Jul 04 21:36:48 2022 +0200 +++ b/liboctave/util/lo-regexp.cc Mon Jul 04 21:38:15 2022 +0200 @@ -83,11 +83,15 @@ while ((new_pos = m_pattern.find ("(?", pos)) != std::string::npos) { + std::size_t tmp_pos; if (m_pattern.size () > new_pos + 2 && m_pattern.at (new_pos + 2) == '<' && ! (m_pattern.size () > new_pos + 3 && (m_pattern.at (new_pos + 3) == '=' - || m_pattern.at (new_pos + 3) == '!'))) + || m_pattern.at (new_pos + 3) == '!')) + && (tmp_pos = m_pattern.find_first_of ('>', new_pos)) + != std::string::npos + && m_pattern.find_first_of (')', tmp_pos) != std::string::npos) { // The syntax of named tokens in pcre is "(?P...)" while // we need a syntax "(?...)", so fix that here. Also an @@ -98,12 +102,6 @@ // that here by replacing name tokens by dummy names, and dealing // with the dummy names later. - std::size_t tmp_pos = m_pattern.find_first_of ('>', new_pos); - - if (tmp_pos == std::string::npos) - (*current_liboctave_error_handler) - ("regexp: syntax error in pattern"); - std::string tmp_name = m_pattern.substr (new_pos+3, tmp_pos-new_pos-3);