Mercurial > jwe > octave
changeset 31121:7d3bda173b63
regexp: Be more thorough in detecting named patterns.
* liboctave/util/lo-regexp.cc (regexp::compile_internal): Check more thoroughly
if named pattern expressions are complete before adapting syntax.
* libinterp/corefcn/regexp.cc (Fregexp): Add tests.
author | Markus Mützel <markus.muetzel@gmx.de> |
---|---|
date | Mon, 04 Jul 2022 21:38:15 +0200 |
parents | d8d90ba892b0 |
children | 46e15523ca06 |
files | libinterp/corefcn/regexp.cc liboctave/util/lo-regexp.cc |
diffstat | 2 files changed, 11 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/libinterp/corefcn/regexp.cc Mon Jul 04 21:36:48 2022 +0200 +++ b/libinterp/corefcn/regexp.cc Mon Jul 04 21:38:15 2022 +0200 @@ -1200,6 +1200,12 @@ %! assert (regexp ('foo!+bar\nbar!+foo', '.\>'), [3, 4, 8, 13, 14, 18]); %! assert (regexp ('foo!+bar\nbar!+foo', '\<\w'), [1, 6, 10, 16]); +## Test "incomplete" named patterns +%!assert <*62705> (regexpi ('<', '\(?<'), 1) +%!assert <*62705> (regexpi ('<n>', '\(?<n\>'), 1) +%!assert <*62705> (regexpi ('<n>', '\(?<n\>\)?'), 1) +%!assert <62705> (regexpi ('<n>a', '\(?<n\>a\)?'), 1) + ## Test input validation %!error regexp ('string', 'tri', 'BadArg') %!error regexp ('string')
--- a/liboctave/util/lo-regexp.cc Mon Jul 04 21:36:48 2022 +0200 +++ b/liboctave/util/lo-regexp.cc Mon Jul 04 21:38:15 2022 +0200 @@ -83,11 +83,15 @@ while ((new_pos = m_pattern.find ("(?", pos)) != std::string::npos) { + std::size_t tmp_pos; if (m_pattern.size () > new_pos + 2 && m_pattern.at (new_pos + 2) == '<' && ! (m_pattern.size () > new_pos + 3 && (m_pattern.at (new_pos + 3) == '=' - || m_pattern.at (new_pos + 3) == '!'))) + || m_pattern.at (new_pos + 3) == '!')) + && (tmp_pos = m_pattern.find_first_of ('>', new_pos)) + != std::string::npos + && m_pattern.find_first_of (')', tmp_pos) != std::string::npos) { // The syntax of named tokens in pcre is "(?P<name>...)" while // we need a syntax "(?<name>...)", so fix that here. Also an @@ -98,12 +102,6 @@ // that here by replacing name tokens by dummy names, and dealing // with the dummy names later. - std::size_t tmp_pos = m_pattern.find_first_of ('>', new_pos); - - if (tmp_pos == std::string::npos) - (*current_liboctave_error_handler) - ("regexp: syntax error in pattern"); - std::string tmp_name = m_pattern.substr (new_pos+3, tmp_pos-new_pos-3);