Mercurial > jwe > octave
diff libinterp/corefcn/regexp.cc @ 29350:8f0d0d4690c0
Change regexp start-of-word/end-of-word behavior for Matlab compatiblity (bug #59992).
* regexp.cc (do_regexp_ptn_string_escapes): Resize retval to length of input
string rather than "length + 0" (random, unrelated change). Translate
start-of-word ('\<') to positive look-behind pattern '(?<=\W|^)'. Translate
end-of-word ('\>') to positive look-ahead pattern '(?=\W|$)'. Add BIST test for
bug #59992.
author | Rik <rik@octave.org> |
---|---|
date | Tue, 02 Feb 2021 14:50:47 -0800 |
parents | 10a35049bad7 |
children | 7854d5752dd2 |
line wrap: on
line diff
--- a/libinterp/corefcn/regexp.cc Tue Feb 02 16:40:33 2021 -0500 +++ b/libinterp/corefcn/regexp.cc Tue Feb 02 14:50:47 2021 -0800 @@ -60,7 +60,7 @@ size_t j = 0; size_t len = s.length (); - retval.resize (len+i); + retval.resize (len); while (j < len) { @@ -79,11 +79,15 @@ } break; - // Translate \< and \> to PCRE word boundary + // Translate \< and \> to PCRE patterns for pseudo-word boundary case '<': // begin word boundary + retval.insert (i, "(?<=\\W|^)"); + i += 8; + break; + case '>': // end word boundary - retval[i] = '\\'; - retval[++i] = 'b'; + retval.insert (i, "(?=\\W|$)"); + i += 7; break; case 'o': // octal input @@ -1178,12 +1182,19 @@ %!assert (regexp ("\n", '\n'), 1) %!assert (regexp ("\n", "\n"), 1) -# Test escape sequences are silently converted +## Test escape sequences are silently converted %!test <*45407> %! assert (regexprep ('s', 's', 'x\.y'), 'x.y'); %! assert (regexprep ('s', '(s)', 'x\$1y'), 'x$1y'); %! assert (regexprep ('s', '(s)', 'x\\$1y'), 'x\sy'); +## Test start-of-word / end-of-word patterns for Matlab compatibility +%!test <*59992> +%! assert (regexp ('foo!+bar', '\<\w'), [1, 6]); +%! assert (regexp ('foo!+bar', '.\>'), [3, 4, 8]); +%! assert (regexp ('foo!+bar\nbar!+foo', '.\>'), [3, 4, 8, 13, 14, 18]); +%! assert (regexp ('foo!+bar\nbar!+foo', '\<\w'), [1, 6, 10, 16]); + ## Test input validation %!error regexp ('string', 'tri', 'BadArg') %!error regexp ('string')