changeset 31121:7d3bda173b63

regexp: Be more thorough in detecting named patterns. * liboctave/util/lo-regexp.cc (regexp::compile_internal): Check more thoroughly if named pattern expressions are complete before adapting syntax. * libinterp/corefcn/regexp.cc (Fregexp): Add tests.
author Markus Mützel <markus.muetzel@gmx.de>
date Mon, 04 Jul 2022 21:38:15 +0200
parents d8d90ba892b0
children 46e15523ca06
files libinterp/corefcn/regexp.cc liboctave/util/lo-regexp.cc
diffstat 2 files changed, 11 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/corefcn/regexp.cc	Mon Jul 04 21:36:48 2022 +0200
+++ b/libinterp/corefcn/regexp.cc	Mon Jul 04 21:38:15 2022 +0200
@@ -1200,6 +1200,12 @@
 %! assert (regexp ('foo!+bar\nbar!+foo', '.\>'), [3, 4, 8, 13, 14, 18]);
 %! assert (regexp ('foo!+bar\nbar!+foo', '\<\w'), [1, 6, 10, 16]);
 
+## Test "incomplete" named patterns
+%!assert <*62705> (regexpi ('<', '\(?<'), 1)
+%!assert <*62705> (regexpi ('<n>', '\(?<n\>'), 1)
+%!assert <*62705> (regexpi ('<n>', '\(?<n\>\)?'), 1)
+%!assert <62705> (regexpi ('<n>a', '\(?<n\>a\)?'), 1)
+
 ## Test input validation
 %!error regexp ('string', 'tri', 'BadArg')
 %!error regexp ('string')
--- a/liboctave/util/lo-regexp.cc	Mon Jul 04 21:36:48 2022 +0200
+++ b/liboctave/util/lo-regexp.cc	Mon Jul 04 21:38:15 2022 +0200
@@ -83,11 +83,15 @@
 
     while ((new_pos = m_pattern.find ("(?", pos)) != std::string::npos)
       {
+        std::size_t tmp_pos;
         if (m_pattern.size () > new_pos + 2
             && m_pattern.at (new_pos + 2) == '<'
             && ! (m_pattern.size () > new_pos + 3
                   && (m_pattern.at (new_pos + 3) == '='
-                      || m_pattern.at (new_pos + 3) == '!')))
+                      || m_pattern.at (new_pos + 3) == '!'))
+            && (tmp_pos = m_pattern.find_first_of ('>', new_pos))
+               != std::string::npos
+            && m_pattern.find_first_of (')', tmp_pos) != std::string::npos)
           {
             // The syntax of named tokens in pcre is "(?P<name>...)" while
             // we need a syntax "(?<name>...)", so fix that here.  Also an
@@ -98,12 +102,6 @@
             // that here by replacing name tokens by dummy names, and dealing
             // with the dummy names later.
 
-            std::size_t tmp_pos = m_pattern.find_first_of ('>', new_pos);
-
-            if (tmp_pos == std::string::npos)
-              (*current_liboctave_error_handler)
-                ("regexp: syntax error in pattern");
-
             std::string tmp_name
               = m_pattern.substr (new_pos+3, tmp_pos-new_pos-3);