comparison liboctave/util/lo-regexp.cc @ 31121:7d3bda173b63

regexp: Be more thorough in detecting named patterns. * liboctave/util/lo-regexp.cc (regexp::compile_internal): Check more thoroughly if named pattern expressions are complete before adapting syntax. * libinterp/corefcn/regexp.cc (Fregexp): Add tests.
author Markus Mützel <markus.muetzel@gmx.de>
date Mon, 04 Jul 2022 21:38:15 +0200
parents 5cf18ef0377c
children
comparison
equal deleted inserted replaced
31120:d8d90ba892b0 31121:7d3bda173b63
81 int inames = 0; 81 int inames = 0;
82 std::ostringstream buf; 82 std::ostringstream buf;
83 83
84 while ((new_pos = m_pattern.find ("(?", pos)) != std::string::npos) 84 while ((new_pos = m_pattern.find ("(?", pos)) != std::string::npos)
85 { 85 {
86 std::size_t tmp_pos;
86 if (m_pattern.size () > new_pos + 2 87 if (m_pattern.size () > new_pos + 2
87 && m_pattern.at (new_pos + 2) == '<' 88 && m_pattern.at (new_pos + 2) == '<'
88 && ! (m_pattern.size () > new_pos + 3 89 && ! (m_pattern.size () > new_pos + 3
89 && (m_pattern.at (new_pos + 3) == '=' 90 && (m_pattern.at (new_pos + 3) == '='
90 || m_pattern.at (new_pos + 3) == '!'))) 91 || m_pattern.at (new_pos + 3) == '!'))
92 && (tmp_pos = m_pattern.find_first_of ('>', new_pos))
93 != std::string::npos
94 && m_pattern.find_first_of (')', tmp_pos) != std::string::npos)
91 { 95 {
92 // The syntax of named tokens in pcre is "(?P<name>...)" while 96 // The syntax of named tokens in pcre is "(?P<name>...)" while
93 // we need a syntax "(?<name>...)", so fix that here. Also an 97 // we need a syntax "(?<name>...)", so fix that here. Also an
94 // expression like 98 // expression like
95 // "(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)" 99 // "(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)"
96 // should be perfectly legal, while pcre does not allow the same 100 // should be perfectly legal, while pcre does not allow the same
97 // named token name on both sides of the alternative. Also fix 101 // named token name on both sides of the alternative. Also fix
98 // that here by replacing name tokens by dummy names, and dealing 102 // that here by replacing name tokens by dummy names, and dealing
99 // with the dummy names later. 103 // with the dummy names later.
100
101 std::size_t tmp_pos = m_pattern.find_first_of ('>', new_pos);
102
103 if (tmp_pos == std::string::npos)
104 (*current_liboctave_error_handler)
105 ("regexp: syntax error in pattern");
106 104
107 std::string tmp_name 105 std::string tmp_name
108 = m_pattern.substr (new_pos+3, tmp_pos-new_pos-3); 106 = m_pattern.substr (new_pos+3, tmp_pos-new_pos-3);
109 107
110 bool found = false; 108 bool found = false;