Mercurial > octave
diff libinterp/corefcn/regexp.cc @ 15541:9db32cabeacf
Fix backslash handling in regexp pattern (Bug #37092)
* NEWS: Give an example of how escape sequence processing in single-quoted
regular expressions works.
* libinterp/corefcn/regexp.cc(do_regexp_string_escapes): Rename to
do_regexp_ptn_string_escapes. Only sequence to expand is '\b' for backspace.
Others are handled by PCRE.
* libinterp/corefcn/regexp.cc(do_regexp_rep_string_escapes): New function to
do escape sequence processing for the replacement string since the sequences
to expand differ from that of the regexp pattern.
* liboctave/util/regexp.cc(regexp::replace): Process backslashes in replacement
string so that '\$1' results in '$1' rather than replacement with first
capture buffer.
author | Rik <rik@octave.org> |
---|---|
date | Wed, 17 Oct 2012 20:13:19 -0700 |
parents | 2fc554ffbc28 |
children | 7eff3032d144 |
line wrap: on
line diff
--- a/libinterp/corefcn/regexp.cc Wed Oct 17 15:56:33 2012 -0700 +++ b/libinterp/corefcn/regexp.cc Wed Oct 17 20:13:19 2012 -0700 @@ -45,12 +45,12 @@ #include "utils.h" // Replace backslash escapes in a string with the real values. We need -// this special function instead of the one in utils.cc because the set -// of escape sequences used in regexps is different from those used in -// the *printf functions. +// two special functions instead of the one in utils.cc because the set +// of escape sequences used for regexp patterns and replacement strings +// is different from those used in the *printf functions. static std::string -do_regexp_string_escapes (const std::string& s) +do_regexp_ptn_string_escapes (const std::string& s) { std::string retval; @@ -66,11 +66,56 @@ { switch (s[++j]) { - case '$': - retval[i] = '$'; + case 'b': // backspace + retval[i] = '\b'; break; - case 'a': +#if 0 +// FIXME : To be complete, we need to handle \oN, \o{N}. +// The PCRE library already handles \N where N +// is an octal number. New code needs to merely +// replace \oN or \o{N} with \N. + case 'o': // octal number +#endif + + default: // pass escape sequence through + retval[i] = '\\'; + retval[++i] = s[j]; + break; + } + } + else + { + retval[i] = s[j]; + } + + i++; + j++; + } + + retval.resize (i); + + return retval; +} + +static std::string +do_regexp_rep_string_escapes (const std::string& s) +{ + std::string retval; + + size_t i = 0; + size_t j = 0; + size_t len = s.length (); + + retval.resize (len); + + while (j < len) + { + if (s[j] == '\\' && j+1 < len) + { + switch (s[++j]) + { + case 'a': // alarm retval[i] = '\a'; break; @@ -98,10 +143,6 @@ retval[i] = '\v'; break; - case '\\': // backslash - retval[i] = '\\'; - break; - #if 0 // FIXME -- to be complete, we need to handle \oN, \o{N}, \xN, and // \x{N}. Hex digits may be upper or lower case. Brackets are @@ -110,8 +151,8 @@ case 'o': // octal number case 'x': // hex number #endif - - default: + + default: // pass escape sequence through retval[i] = '\\'; retval[++i] = s[j]; break; @@ -205,7 +246,7 @@ return retval; // Matlab compatibility. if (args(1).is_sq_string ()) - pattern = do_regexp_string_escapes (pattern); + pattern = do_regexp_ptn_string_escapes (pattern); regexp::opts options; options.case_insensitive (case_insensitive); @@ -1196,14 +1237,14 @@ return retval; // Matlab compatibility. if (args(1).is_sq_string ()) - pattern = do_regexp_string_escapes (pattern); + pattern = do_regexp_ptn_string_escapes (pattern); std::string replacement = args(2).string_value (); if (error_state) return retval; // Matlab compatibility. if (args(2).is_sq_string ()) - replacement = do_regexp_string_escapes (replacement); + replacement = do_regexp_rep_string_escapes (replacement); // Pack options excluding 'tokenize' and various output // reordering strings into regexp arg list