changeset 20325:b6a59cc96bfa

Process backslashes in regexprep replacement strings in Matlab compatible fashion (bug #45407). * NEWS: Announce change. Backslashed escaped ordinary characters are now replaced by the character itself with no emitted warning. * regexp.cc (do_regexp_rep_string_escapes): Change default case statement to silently convert unrecognized escaped character to the bare character itself. Pass the special sequences of '$' and '\' through since these must be handled by lo-regexp.cc. * regexp.cc (Fregexprep): Add BIST tests for new behavior.
author Rik <rik@octave.org>
date Mon, 29 Jun 2015 08:56:22 -0700
parents 13ede127ec9a
children 64f6d0543626
files NEWS libinterp/corefcn/regexp.cc
diffstat 2 files changed, 27 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS	Sun Jun 28 19:42:44 2015 -0700
+++ b/NEWS	Mon Jun 29 08:56:22 2015 -0700
@@ -12,6 +12,13 @@
     octal: '\oNNN' or '\o{NNN}'
     hex  : '\xNN'  or '\x{NN}'
 
+ ** Unknown escape sequences in the replacement string for regexprep are now
+    substituted with their unescaped version and no warning is emitted.
+    This change was made for Matlab compatibility.
+
+    Example: regexprep ('a', 'a', 'x\yz')
+             => 'xyz'
+
  ** mkfifo now interprets the MODE argument as an octal, not decimal, integer.
     This is consistent with the equivalent shell command. 
 
--- a/libinterp/corefcn/regexp.cc	Sun Jun 28 19:42:44 2015 -0700
+++ b/libinterp/corefcn/regexp.cc	Mon Jun 29 08:56:22 2015 -0700
@@ -245,9 +245,20 @@
               break;
             }
 
-            default:  // pass escape sequence through
-              retval[i] = '\\';
-              retval[++i] = s[j];
+            // Both dollar sign (for capture buffer) and backslash are
+            // passed through with their escape backslash.  The processing
+            // for these must occur during the actual replacement operation
+            // in lo-regexp.cc.
+            case '$':  // pass dollar sign through with escape
+              retval[i] = '\\'; retval[++i] = '$';
+              break;
+
+            case '\\': // pass backslash through with escape
+              retval[i] = '\\'; retval[++i] = '\\';
+              break;
+
+            default:   // convert escaped character to unescaped char
+              retval[i] = s[j];
               break;
             }
         }
@@ -1151,6 +1162,12 @@
 
 %!assert (regexp ("\n", '\n'), 1);
 %!assert (regexp ("\n", "\n"), 1);
+
+%!test  # Bug #45407, escape sequences are silently converted
+%! assert (regexprep ('s', 's', 'x\.y'), 'x.y');
+%! assert (regexprep ('s', '(s)', 'x\$1y'), 'x$1y');
+%! assert (regexprep ('s', '(s)', 'x\\$1y'), 'x\sy');
+
 */
 
 DEFUN (regexpi, args, nargout,