Mercurial > octave
diff libinterp/corefcn/regexp.cc @ 24751:6e670c58c6f0
Expand octal sequences in regexprep replacement string for Matlab compatibility.
* regexp.cc (do_regexp_rep_string_escapes): Add code to expand octal escape
sequences.
* regexp.cc (Fregexp, Fregexprep): Add BIST tests and format tests to less
than 80 columns.
author | Rik <rik@octave.org> |
---|---|
date | Tue, 13 Feb 2018 13:51:35 -0800 |
parents | 00dfa167c1fe |
children | 0b9e7fcaab91 |
line wrap: on
line diff
--- a/libinterp/corefcn/regexp.cc Tue Feb 13 19:36:38 2018 +0100 +++ b/libinterp/corefcn/regexp.cc Tue Feb 13 13:51:35 2018 -0800 @@ -181,6 +181,30 @@ retval[i] = '\v'; break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': // octal input + { + size_t k; + int tmpi = s[j] - '0'; + for (k = j+1; k < std::min (j+3, len); k++) + { + int digit = s[k] - '0'; + if (digit < 0 || digit > 7) + break; + tmpi <<= 3; + tmpi += digit; + } + retval[i] = tmpi; + j = k - 1; + break; + } + case 'o': // octal input { bool bad_esc_seq = (j+1 >= len); @@ -1025,6 +1049,7 @@ %! assert (tokens.T1, "a"); %! assert (tokens.T2, "de"); +## Test options to regexp %!assert (regexp ("abc\nabc", '.'), [1:7]) %!assert (regexp ("abc\nabc", '.', 'dotall'), [1:7]) %!test @@ -1093,9 +1118,6 @@ %! assert (isempty (fieldnames (nm))); %! assert (sp, { "", "", "A", "", "E", "" }); -%!error regexp ('string', 'tri', 'BadArg') -%!error regexp ('string') - %!assert (regexp ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, '-'), {6;[1,5,9];zeros(1,0)}) %!assert (regexp ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, {'-';'f';'q'}), {6;[3,7];[1,9]}) %!assert (regexp ('Strings', {'t','s'}), {2, 7}) @@ -1149,6 +1171,7 @@ %! assert (a, {"oo"}); %! assert (b, {"f", " bar"}); +## Test escape sequences are expanded even in single-quoted strings %!assert (regexp ("\n", '\n'), 1) %!assert (regexp ("\n", "\n"), 1) @@ -1158,6 +1181,10 @@ %! assert (regexprep ('s', '(s)', 'x\$1y'), 'x$1y'); %! assert (regexprep ('s', '(s)', 'x\\$1y'), 'x\sy'); +## Test input validation +%!error regexp ('string', 'tri', 'BadArg') +%!error regexp ('string') + */ DEFUN (regexpi, args, nargout, @@ -1492,23 +1519,26 @@ ## Return the original if no match %!assert (regexprep ('hello', 'world', 'earth'), 'hello') -## Test emptymatch +## Test emptymatch option %!assert (regexprep ('World', '^', 'Hello '), 'World') %!assert (regexprep ('World', '^', 'Hello ', 'emptymatch'), 'Hello World') ## Test a general replacement %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_"), "a_b_c_d_e_f_g") -## Make sure it works at the beginning and end +## Make sure replacements work at the beginning and end of string %!assert (regexprep ("a[b]c{d}e-f=g", "a", "_"), "_[b]c{d}e-f=g") %!assert (regexprep ("a[b]c{d}e-f=g", "g", "_"), "a[b]c{d}e-f=_") -## Options -%!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_", "once"), "a_b]c{d}e-f=g") -%!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "ignorecase"), "a_b_c_d_e_f_g") +## Test options "once" and "ignorecase" +%!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_", "once"), +%! "a_b]c{d}e-f=g") +%!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "ignorecase"), +%! "a_b_c_d_e_f_g") ## Option combinations -%!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "once", "ignorecase"), "a_b]c{d}e-f=g") +%!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "once", "ignorecase"), +%! "a_b]c{d}e-f=g") ## End conditions on replacement %!assert (regexprep ("abc", "(b)", ".$1"), "a.bc") @@ -1521,14 +1551,22 @@ %!assert (regexprep ({"abc","cba"}, "b", "?"), {"a?c","c?a"}) %!assert (regexprep ({"abc","cba"}, {"b","a"}, {"?","!"}), {"!?c","c?!"}) -# Nasty lookbehind expression +## Nasty lookbehind expression %!test %! warning ("off", "Octave:regexp-lookbehind-limit", "local"); -%! assert (regexprep ('x^(-1)+y(-1)+z(-1)=0', '(?<=[a-z]+)\(\-[1-9]*\)', '_minus1'),'x^(-1)+y_minus1+z_minus1=0'); +%! assert (regexprep ('x^(-1)+y(-1)+z(-1)=0', '(?<=[a-z]+)\(\-[1-9]*\)', +%! '_minus1'),'x^(-1)+y_minus1+z_minus1=0'); +## Verify escape sequences in pattern %!assert (regexprep ("\n", '\n', "X"), "X") %!assert (regexprep ("\n", "\n", "X"), "X") +## Verify NULLs in pattern and replacement string +%!assert (regexprep ("A\0A", "\0", ","), "A,A") +%!assert (regexprep ("A\0A", '\0', ","), "A,A") +%!assert (regexprep ("A,A", "A", "B\0B"), "B\0B,B\0B") +%!assert (regexprep ("A,A", "A", 'B\0B'), "B\0B,B\0B") + ## Empty matches were broken on ARM architecture %!test <*52810> %! assert (strcmp (regexprep ("\nabc", "^(\t*)(abc)$", "$1$2", "lineanchors"), "\nabc"))