# HG changeset patch # User Rik # Date 1432228947 25200 # Node ID f2bc7d23295d8b205ac5edf75831c02f376ba167 # Parent 29eb47fe8e8c68c59cf7769336fc6e109bea9be7 Add special hex/octal escape sequence processing for regexp. * NEWS: Announce change done for Matlab compatibility. * regexp.cc (do_regexp_ptn_string_escapes): Add case for parsing '\oNNN' or '\o{NNN}' octal sequences. PCRE already handles the hex case. * regexp.cc (do_regexp_rep_string_escapes): Add case for parsing '\oNNN' or '\o{NNN}' octal sequences. Add case for parsing '\xNN' or '\x{NN}'. diff -r 29eb47fe8e8c -r f2bc7d23295d NEWS --- a/NEWS Wed May 20 17:18:41 2015 -0700 +++ b/NEWS Thu May 21 10:22:27 2015 -0700 @@ -6,6 +6,12 @@ The *printf family of functions now supports octal and hex escape sequences in single-quoted strings for Matlab compatibility. + ** Special octal and hex escape sequences for the pattern and replacement + strings in regular expressions are now interpreted for Matlab compatibility. + + octal: '\oNNN' or '\o{NNN}' + hex : '\xNN' or '\x{NN}' + ** mkfifo now interprets the MODE argument as an octal, not decimal, integer. This is consistent with the equivalent shell command. diff -r 29eb47fe8e8c -r f2bc7d23295d libinterp/corefcn/regexp.cc --- a/libinterp/corefcn/regexp.cc Wed May 20 17:18:41 2015 -0700 +++ b/libinterp/corefcn/regexp.cc Thu May 21 10:22:27 2015 -0700 @@ -77,13 +77,38 @@ retval[++i] = 'b'; break; -#if 0 -// FIXME: To be complete, we need to handle \oN, \o{N}. -// The PCRE library already handles \N where N -// is an octal number. New code needs to merely -// replace \oN or \o{N} with \N. - case 'o': // octal number -#endif + case 'o': // octal input + { + bool bad_esc_seq = (j+1 >= len); + + bool brace = false; + if (! bad_esc_seq && s[++j] == '{') + { + brace = true; + j++; + } + + int tmpi = 0; + size_t k; + for (k = j; k < std::min (j+3+brace, len); k++) + { + int digit = s[k] - '0'; + if (digit < 0 || digit > 7) + break; + tmpi <<= 3; + tmpi += digit; + } + if (bad_esc_seq || (brace && s[k++] != '}')) + { + bad_esc_seq = true; + tmpi = 0; + warning ("malformed octal escape sequence '\\o' --\ + converting to '\\0'"); + } + retval[i] = tmpi; + j = k - 1; + break; + } default: // pass escape sequence through retval[i] = '\\'; @@ -150,14 +175,75 @@ retval[i] = '\v'; break; -#if 0 -// FIXME: to be complete, we need to handle \oN, \o{N}, \xN, and -// \x{N}. Hex digits may be upper or lower case. Brackets are -// optional, so \x5Bz is the same as \x{5B}z. + case 'o': // octal input + { + bool bad_esc_seq = (j+1 >= len); + + bool brace = false; + if (! bad_esc_seq && s[++j] == '{') + { + brace = true; + j++; + } + + int tmpi = 0; + size_t k; + for (k = j; k < std::min (j+3+brace, len); k++) + { + int digit = s[k] - '0'; + if (digit < 0 || digit > 7) + break; + tmpi <<= 3; + tmpi += digit; + } + if (bad_esc_seq || (brace && s[k++] != '}')) + { + warning ("malformed octal escape sequence '\\o' --\ + converting to '\\0'"); + tmpi = 0; + } + retval[i] = tmpi; + j = k - 1; + break; + } - case 'o': // octal number - case 'x': // hex number -#endif + case 'x': // hex input + { + bool bad_esc_seq = (j+1 >= len); + + bool brace = false; + if (! bad_esc_seq && s[++j] == '{') + { + brace = true; + j++; + } + + int tmpi = 0; + size_t k; + for (k = j; k < std::min (j+2+brace, len); k++) + { + if (! isxdigit (s[k])) + break; + + tmpi <<= 4; + int digit = s[k]; + if (digit >= 'a') + tmpi += digit - 'a' + 10; + else if (digit >= 'A') + tmpi += digit - 'A' + 10; + else + tmpi += digit - '0'; + } + if (bad_esc_seq || (brace && s[k++] != '}')) + { + warning ("malformed hex escape sequence '\\x' --\ + converting to '\\0'"); + tmpi = 0; + } + retval[i] = tmpi; + j = k - 1; + break; + } default: // pass escape sequence through retval[i] = '\\';