Mercurial > jwe > octave
comparison liboctave/util/lo-regexp.cc @ 31121:7d3bda173b63
regexp: Be more thorough in detecting named patterns.
* liboctave/util/lo-regexp.cc (regexp::compile_internal): Check more thoroughly
if named pattern expressions are complete before adapting syntax.
* libinterp/corefcn/regexp.cc (Fregexp): Add tests.
author | Markus Mützel <markus.muetzel@gmx.de> |
---|---|
date | Mon, 04 Jul 2022 21:38:15 +0200 |
parents | 5cf18ef0377c |
children |
comparison
equal
deleted
inserted
replaced
31120:d8d90ba892b0 | 31121:7d3bda173b63 |
---|---|
81 int inames = 0; | 81 int inames = 0; |
82 std::ostringstream buf; | 82 std::ostringstream buf; |
83 | 83 |
84 while ((new_pos = m_pattern.find ("(?", pos)) != std::string::npos) | 84 while ((new_pos = m_pattern.find ("(?", pos)) != std::string::npos) |
85 { | 85 { |
86 std::size_t tmp_pos; | |
86 if (m_pattern.size () > new_pos + 2 | 87 if (m_pattern.size () > new_pos + 2 |
87 && m_pattern.at (new_pos + 2) == '<' | 88 && m_pattern.at (new_pos + 2) == '<' |
88 && ! (m_pattern.size () > new_pos + 3 | 89 && ! (m_pattern.size () > new_pos + 3 |
89 && (m_pattern.at (new_pos + 3) == '=' | 90 && (m_pattern.at (new_pos + 3) == '=' |
90 || m_pattern.at (new_pos + 3) == '!'))) | 91 || m_pattern.at (new_pos + 3) == '!')) |
92 && (tmp_pos = m_pattern.find_first_of ('>', new_pos)) | |
93 != std::string::npos | |
94 && m_pattern.find_first_of (')', tmp_pos) != std::string::npos) | |
91 { | 95 { |
92 // The syntax of named tokens in pcre is "(?P<name>...)" while | 96 // The syntax of named tokens in pcre is "(?P<name>...)" while |
93 // we need a syntax "(?<name>...)", so fix that here. Also an | 97 // we need a syntax "(?<name>...)", so fix that here. Also an |
94 // expression like | 98 // expression like |
95 // "(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)" | 99 // "(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)" |
96 // should be perfectly legal, while pcre does not allow the same | 100 // should be perfectly legal, while pcre does not allow the same |
97 // named token name on both sides of the alternative. Also fix | 101 // named token name on both sides of the alternative. Also fix |
98 // that here by replacing name tokens by dummy names, and dealing | 102 // that here by replacing name tokens by dummy names, and dealing |
99 // with the dummy names later. | 103 // with the dummy names later. |
100 | |
101 std::size_t tmp_pos = m_pattern.find_first_of ('>', new_pos); | |
102 | |
103 if (tmp_pos == std::string::npos) | |
104 (*current_liboctave_error_handler) | |
105 ("regexp: syntax error in pattern"); | |
106 | 104 |
107 std::string tmp_name | 105 std::string tmp_name |
108 = m_pattern.substr (new_pos+3, tmp_pos-new_pos-3); | 106 = m_pattern.substr (new_pos+3, tmp_pos-new_pos-3); |
109 | 107 |
110 bool found = false; | 108 bool found = false; |