Mercurial > octave
diff src/DLD-FUNCTIONS/regexp.cc @ 14024:fc9f204faea0
refactor regexp (bug #34440)
* liboctave/regexp.h, liboctave/regexp.cc: New files.
Provide classes and functions for regular expressions.
Adapted from src/DLD-FUNCTIONS/regexp.cc.
* regex-match.h, regex-match.cc: Delete
* liboctave/Makefile.am (INCS, LIBOCTAVE_CXX_SOURCES): Update.
* variables.cc (name_matches_any_pattern): Use new regexp class.
* symtab.h (symbol_table::regexp_global_variables,
symbol_table::do_clear_variable_regexp, symbol_table::do_regexp):
Likewise.
* DLD-FUNCTIONS/regexp.cc (parse_options): New function.
(octregexp, octcellregexp, octregexprep): Extract matching code for
use in new regexp class. Use new regexp class to provide required
functionality.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Sun, 11 Dec 2011 22:19:57 -0500 |
parents | 9cae456085c2 |
children | 72c96de7a403 |
line wrap: on
line diff
--- a/src/DLD-FUNCTIONS/regexp.cc Sun Dec 11 18:28:35 2011 -0500 +++ b/src/DLD-FUNCTIONS/regexp.cc Sun Dec 11 22:19:57 2011 -0500 @@ -25,501 +25,106 @@ #include <config.h> #endif -#include <algorithm> +#include <list> #include <sstream> -#include "defun-dld.h" -#include "error.h" -#include "gripes.h" -#include "oct-obj.h" -#include "utils.h" - -#include "Cell.h" -#include "oct-map.h" -#include "str-vec.h" -#include "quit.h" -#include "parse.h" -#include "oct-locbuf.h" - #include <pcre.h> -// Define the maximum number of retries for a pattern that -// possibly results in an infinite recursion. -#define PCRE_MATCHLIMIT_MAX 10 - -// The regexp is constructed as a linked list to avoid resizing the -// return values in arrays at each new match. - -// FIXME don't bother collecting and composing return values the user -// doesn't want. - -class regexp_elem -{ -public: - regexp_elem (const string_vector& _named_token, const Cell& _t, - const std::string& _m, const Matrix& _te, double _s, - double _e) : - named_token (_named_token), t (_t), m (_m), te (_te), s (_s), e (_e) { } - - regexp_elem (const regexp_elem &a) : named_token (a.named_token), t (a.t), - m (a.m), te (a.te), s (a.s), e (a.e) - { } - - string_vector named_token; - Cell t; - std::string m; - Matrix te; - double s; - double e; -}; - -typedef std::list<regexp_elem>::const_iterator const_iterator; - -#define MAXLOOKBEHIND 10 -static bool lookbehind_warned = false; +#include "base-list.h" +#include "oct-locbuf.h" +#include "quit.h" +#include "regexp.h" +#include "str-vec.h" -static int -octregexp_list (const octave_value_list &args, const std::string &nm, - bool case_insensitive, std::list<regexp_elem> &lst, - string_vector &named, int &nopts, bool &once) -{ - int sz = 0; - - int nargin = args.length (); - bool lineanchors = false; - bool dotexceptnewline = false; - bool freespacing = false; - - nopts = nargin - 2; - once = false; +#include "defun-dld.h" +#include "Cell.h" +#include "error.h" +#include "gripes.h" +#include "oct-map.h" +#include "oct-obj.h" +#include "utils.h" - std::string buffer = args(0).string_value (); - size_t max_length = (buffer.length () > MAXLOOKBEHIND ? - MAXLOOKBEHIND: buffer.length ()); - - if (error_state) - { - gripe_wrong_type_arg (nm.c_str (), args(0)); - return 0; - } +static void +parse_options (regexp::opts& options, const octave_value_list& args, + const std::string& who, int skip, bool& extra_args) +{ + int nargin = args.length (); - std::string pattern = args(1).string_value (); + extra_args = false; - if (error_state) - { - gripe_wrong_type_arg (nm.c_str (), args(1)); - return 0; - } - - for (int i = 2; i < nargin; i++) + for (int i = skip; i < nargin; i++) { std::string str = args(i).string_value (); if (error_state) { - error ("%s: optional arguments must be strings", nm.c_str ()); + error ("%s: optional arguments must be character strings", + who.c_str ()); break; } std::transform (str.begin (), str.end (), str.begin (), tolower); if (str.find ("once", 0) == 0) - { - once = true; - nopts--; - } + options.once (true); else if (str.find ("matchcase", 0) == 0) - { - case_insensitive = false; - nopts--; - } + options.case_insensitive (false); else if (str.find ("ignorecase", 0) == 0) - { - case_insensitive = true; - nopts--; - } + options.case_insensitive (true); else if (str.find ("dotall", 0) == 0) - { - dotexceptnewline = false; - nopts--; - } + options.dotexceptnewline (false); else if (str.find ("stringanchors", 0) == 0) - { - lineanchors = false; - nopts--; - } + options.lineanchors (false); else if (str.find ("literalspacing", 0) == 0) - { - freespacing = false; - nopts--; - } + options.freespacing (false); else if (str.find ("dotexceptnewline", 0) == 0) - { - dotexceptnewline = true; - nopts--; - } + options.dotexceptnewline (true); else if (str.find ("lineanchors", 0) == 0) - { - lineanchors = true; - nopts--; - } + options.lineanchors (true); else if (str.find ("freespacing", 0) == 0) - { - freespacing = true; - nopts--; - } - else if (str.find ("start", 0) && str.find ("end", 0) - && str.find ("tokenextents", 0) && str.find ("match", 0) - && str.find ("tokens", 0) && str.find ("names", 0) - && str.find ("split", 0)) - error ("%s: unrecognized option", nm.c_str ()); + options.freespacing (true); + else if (str.find ("start", 0) == 0 + || str.find ("end", 0) == 0 + || str.find ("tokenextents", 0) == 0 + || str.find ("match", 0) == 0 + || str.find ("tokens", 0) == 0 + || str.find ("names", 0) == 0 + || str.find ("split", 0) == 0) + extra_args = true; + else + error ("%s: unrecognized option", who.c_str ()); } - - if (!error_state) - { - Cell t; - std::string m; - double s, e; - - // named tokens "(?<name>...)" are only treated with PCRE not regex. - - size_t pos = 0; - size_t new_pos; - int nnames = 0; - int inames = 0; - std::ostringstream buf; - Array<int> named_idx; - - while ((new_pos = pattern.find ("(?", pos)) != std::string::npos) - { - if (pattern.at (new_pos + 2) == '<' - && !(pattern.at (new_pos + 3) == '=' - || pattern.at (new_pos + 3) == '!')) - { - // The syntax of named tokens in pcre is "(?P<name>...)" while - // we need a syntax "(?<name>...)", so fix that here. Also an - // expression like - // "(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)" - // should be perfectly legal, while pcre does not allow the same - // named token name on both sides of the alternative. Also fix - // that here by replacing name tokens by dummy names, and dealing - // with the dummy names later. - - size_t tmp_pos = pattern.find_first_of ('>', new_pos); - - if (tmp_pos == std::string::npos) - { - error ("regexp: syntax error in pattern"); - break; - } - - std::string tmp_name = - pattern.substr (new_pos+3, tmp_pos-new_pos-3); - - bool found = false; - - for (int i = 0; i < nnames; i++) - { - if (named(i) == tmp_name) - { - named_idx.resize (dim_vector (inames+1, 1)); - named_idx(inames) = i; - found = true; - break; - } - } - - if (! found) - { - named_idx.resize (dim_vector (inames+1, 1)); - named_idx(inames) = nnames; - named.append (tmp_name); - nnames++; - } - - if (new_pos - pos > 0) - buf << pattern.substr (pos, new_pos-pos); - if (inames < 10) - buf << "(?P<n00" << inames++; - else if (inames < 100) - buf << "(?P<n0" << inames++; - else - buf << "(?P<n" << inames++; - - pos = tmp_pos; - } - else if (pattern.at (new_pos + 2) == '<') - { - // Find lookbehind operators of arbitrary length (ie like - // "(?<=[a-z]*)") and replace with a maximum length operator - // as PCRE can not yet handle arbitrary length lookahead - // operators. Use the string length as the maximum length to - // avoid issues. - - int brackets = 1; - size_t tmp_pos1 = new_pos + 2; - size_t tmp_pos2 = tmp_pos1; - - while (tmp_pos1 <= pattern.length () && brackets > 0) - { - char ch = pattern.at (tmp_pos1); - - if (ch == '(') - brackets++; - else if (ch == ')') - { - if (brackets > 1) - tmp_pos2 = tmp_pos1; - - brackets--; - } - - tmp_pos1++; - } - - if (brackets != 0) - { - buf << pattern.substr (pos, new_pos - pos) << "(?"; - pos = new_pos + 2; - } - else - { - size_t tmp_pos3 = pattern.find_first_of ("*+", tmp_pos2); - - if (tmp_pos3 != std::string::npos && tmp_pos3 < tmp_pos1) - { - if (!lookbehind_warned) - { - lookbehind_warned = true; - warning ("%s: arbitrary length lookbehind patterns are only supported up to length %d", - nm.c_str (), MAXLOOKBEHIND); - } - - buf << pattern.substr (pos, new_pos - pos) << "("; - - size_t i; - - if (pattern.at (tmp_pos3) == '*') - i = 0; - else - i = 1; - - for (; i < max_length + 1; i++) - { - buf << pattern.substr (new_pos, tmp_pos3 - new_pos) - << "{" << i << "}"; - buf << pattern.substr (tmp_pos3 + 1, - tmp_pos1 - tmp_pos3 - 1); - if (i != max_length) - buf << "|"; - } - buf << ")"; - } - else - buf << pattern.substr (pos, tmp_pos1 - pos); - - pos = tmp_pos1; - } - } - else - { - buf << pattern.substr (pos, new_pos - pos) << "(?"; - pos = new_pos + 2; - } - - } - - buf << pattern.substr (pos); - - if (error_state) - return 0; - - // Compile expression - const char *err; - int erroffset; - std::string buf_str = buf.str (); - - pcre *re = pcre_compile (buf_str.c_str (), - ((case_insensitive ? PCRE_CASELESS : 0) - | (dotexceptnewline ? 0 : PCRE_DOTALL) - | (lineanchors ? PCRE_MULTILINE : 0) - | (freespacing ? PCRE_EXTENDED : 0)), - &err, &erroffset, 0); - - if (re == 0) - { - error ("%s: %s at position %d of expression", nm.c_str (), - err, erroffset); - return 0; - } - - int subpatterns; - int namecount; - int nameentrysize; - char *nametable; - int idx = 0; - - pcre_fullinfo (re, 0, PCRE_INFO_CAPTURECOUNT, &subpatterns); - pcre_fullinfo (re, 0, PCRE_INFO_NAMECOUNT, &namecount); - pcre_fullinfo (re, 0, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize); - pcre_fullinfo (re, 0, PCRE_INFO_NAMETABLE, &nametable); - - OCTAVE_LOCAL_BUFFER (int, ovector, (subpatterns+1)*3); - OCTAVE_LOCAL_BUFFER (int, nidx, namecount); - - for (int i = 0; i < namecount; i++) - { - // Index of subpattern in first two bytes MSB first of name. - // Extract index. - nidx[i] = (static_cast<int> (nametable[i*nameentrysize])) << 8 - | static_cast<int> (nametable[i*nameentrysize+1]); - } - - while (true) - { - OCTAVE_QUIT; - - int matches = pcre_exec (re, 0, buffer.c_str (), - buffer.length (), idx, - (idx ? PCRE_NOTBOL : 0), - ovector, (subpatterns+1)*3); - - if (matches == PCRE_ERROR_MATCHLIMIT) - { - // Try harder; start with default value for MATCH_LIMIT - // and increase it. - warning ("your pattern caused PCRE to hit its MATCH_LIMIT; trying harder now, but this will be slow"); - - pcre_extra pe; - - pcre_config (PCRE_CONFIG_MATCH_LIMIT, - static_cast <void *> (&pe.match_limit)); - - pe.flags = PCRE_EXTRA_MATCH_LIMIT; - - int i = 0; - while (matches == PCRE_ERROR_MATCHLIMIT - && i++ < PCRE_MATCHLIMIT_MAX) - { - OCTAVE_QUIT; - - pe.match_limit *= 10; - matches = pcre_exec (re, &pe, buffer.c_str (), - buffer.length (), idx, - (idx ? PCRE_NOTBOL : 0), - ovector, (subpatterns+1)*3); - } - } - - if (matches < 0 && matches != PCRE_ERROR_NOMATCH) - { - error ("%s: internal error calling pcre_exec; error code from pcre_exec is %i", - nm.c_str (), matches); - pcre_free (re); - return 0; - } - else if (matches == PCRE_ERROR_NOMATCH) - break; - else if (ovector[1] <= ovector[0]) - { - // Zero sized match. Skip to next char. - idx = ovector[0] + 1; - if (idx < buffer.length ()) - continue; - else - break; - } - else - { - int pos_match = 0; - Matrix te (matches-1, 2); - - for (int i = 1; i < matches; i++) - { - if (ovector[2*i] >= 0 && ovector[2*i+1] > 0 - && (i == 1 || ovector[2*i] != ovector[2*i-2] - || ovector[2*i-1] != ovector[2*i+1]) - && ovector[2*i] >= 0 && ovector[2*i+1] > 0) - { - te(pos_match,0) = double (ovector[2*i]+1); - te(pos_match++,1) = double (ovector[2*i+1]); - } - } - - te.resize (pos_match, 2); - - s = double (ovector[0]+1); - e = double (ovector[1]); - - const char **listptr; - int status = pcre_get_substring_list (buffer.c_str (), ovector, - matches, &listptr); - - if (status == PCRE_ERROR_NOMEMORY) - { - error ("%s: cannot allocate memory in pcre_get_substring_list", - nm.c_str ()); - pcre_free (re); - return 0; - } - - Cell cell_t (dim_vector (1, pos_match)); - string_vector named_tokens (nnames); - int pos_offset = 0; - pos_match = 0; - - for (int i = 1; i < matches; i++) - { - if (ovector[2*i] >= 0 && ovector[2*i+1] > 0) - { - if (i == 1 || ovector[2*i] != ovector[2*i-2] - || ovector[2*i-1] != ovector[2*i+1]) - { - if (namecount > 0) - named_tokens(named_idx(i-pos_offset-1)) = - std::string (*(listptr+nidx[i-pos_offset-1])); - cell_t(pos_match++) = - std::string (*(listptr+i)); - } - else - pos_offset++; - } - } - - m = std::string (*listptr); - t = cell_t; - - pcre_free_substring_list (listptr); - - regexp_elem new_elem (named_tokens, t, m, te, s, e); - lst.push_back (new_elem); - idx = ovector[1]; - sz++; - - if (once || idx >= buffer.length ()) - break; - - } - } - - pcre_free (re); - } - - return sz; } static octave_value_list -octregexp (const octave_value_list &args, int nargout, const std::string &nm, - bool case_insensitive) +octregexp (const octave_value_list &args, int nargout, + const std::string &who, bool case_insensitive = false) { octave_value_list retval; + int nargin = args.length (); - std::list<regexp_elem> lst; - string_vector named; - int nopts; - bool once; + + // Make sure we have string, pattern + const std::string buffer = args(0).string_value (); + if (error_state) + return retval; + + const std::string pattern = args(1).string_value (); + if (error_state) + return retval; - int sz = octregexp_list (args, nm, case_insensitive, lst, named, nopts, once); + regexp::opts options; + options.case_insensitive (case_insensitive); + bool extra_options = false; + parse_options (options, args, who, 2, extra_options); + if (error_state) + return retval; + + regexp::match_data rx_lst = regexp_match (pattern, buffer, options, who); + + string_vector named_pats = rx_lst.named_patterns (); + + size_t sz = rx_lst.size (); if (! error_state) { @@ -532,47 +137,54 @@ if (sz == 1) { - for (int j = 0; j < named.length (); j++) - nmap.assign (named(j), lst.begin()->named_token (j)); + string_vector named_tokens = rx_lst.begin()->named_tokens (); + + for (int j = 0; j < named_pats.length (); j++) + nmap.assign (named_pats(j), named_tokens(j)); retval(5) = nmap; } else { - for (int j = 0; j < named.length (); j++) + for (int j = 0; j < named_pats.length (); j++) { Cell tmp (dim_vector (1, sz)); i = 0; - for (const_iterator p = lst.begin (); p != lst.end (); p++) - tmp(i++) = p->named_token (j); + for (regexp::match_data::const_iterator p = rx_lst.begin (); + p != rx_lst.end (); p++) + { + string_vector named_tokens = p->named_tokens (); - nmap.assign (named(j), octave_value (tmp)); + tmp(i++) = named_tokens(j); + } + + nmap.assign (named_pats(j), octave_value (tmp)); } retval(5) = nmap; } - std::string buffer = args(0).string_value (); - - if (once) + if (options.once ()) { - retval(4) = sz ? lst.front ().t : Cell (); - retval(3) = sz ? lst.front ().m : std::string (); - retval(2) = sz ? lst.front ().te : Matrix (); + regexp::match_data::const_iterator p = rx_lst.begin (); + + retval(4) = sz ? p->tokens () : Cell (); + retval(3) = sz ? p->match_string () : std::string (); + retval(2) = sz ? p->token_extents () : Matrix (); if (sz) { - double e = lst.front ().e; - double s = lst.front ().s; + double start = p->start (); + double end = p->end (); - Cell sp (dim_vector (1, 2)); - sp(0) = buffer.substr (0, s-1); - sp(1) = buffer.substr (e); + Cell split (dim_vector (1, 2)); + split(0) = buffer.substr (0, start-1); + split(1) = buffer.substr (end); - retval(6) = sp; - retval(1) = e; - retval(0) = s; + retval(6) = split; + retval(1) = end; + retval(0) = start; } else { @@ -583,39 +195,45 @@ } else { - Cell t (dim_vector (1, sz)); - Cell m (dim_vector (1, sz)); - Cell te (dim_vector (1, sz)); - NDArray e (dim_vector (1, sz)); - NDArray s (dim_vector (1, sz)); - Cell sp (dim_vector (1, sz+1)); + Cell tokens (dim_vector (1, sz)); + Cell match_string (dim_vector (1, sz)); + Cell token_extents (dim_vector (1, sz)); + NDArray end (dim_vector (1, sz)); + NDArray start (dim_vector (1, sz)); + Cell split (dim_vector (1, sz+1)); size_t sp_start = 0; i = 0; - for (const_iterator p = lst.begin (); p != lst.end (); p++) + for (regexp::match_data::const_iterator p = rx_lst.begin (); + p != rx_lst.end (); p++) { - t(i) = p->t; - m(i) = p->m; - te(i) = p->te; - e(i) = p->e; - s(i) = p->s; - sp(i) = buffer.substr (sp_start, p->s-sp_start-1); - sp_start = p->e; + double s = p->start (); + double e = p->end (); + + string_vector tmp = p->tokens (); + tokens(i) = Cell (dim_vector (1, tmp.length ()), tmp); + match_string(i) = p->match_string (); + token_extents(i) = p->token_extents (); + end(i) = e; + start(i) = s; + split(i) = buffer.substr (sp_start, s-sp_start-1); + sp_start = e; i++; } - sp(i) = buffer.substr (sp_start); + split(i) = buffer.substr (sp_start); - retval(6) = sp; - retval(4) = t; - retval(3) = m; - retval(2) = te; - retval(1) = e; - retval(0) = s; + retval(6) = split; + retval(4) = tokens; + retval(3) = match_string; + retval(2) = token_extents; + retval(1) = end; + retval(0) = start; } // Alter the order of the output arguments - if (nopts > 0) + + if (extra_options) { int n = 0; octave_value_list new_retval; @@ -682,7 +300,7 @@ static octave_value_list octcellregexp (const octave_value_list &args, int nargout, - const std::string &nm, bool case_insensitive) + const std::string &who, bool case_insensitive = false) { octave_value_list retval; @@ -705,7 +323,7 @@ for (octave_idx_type i = 0; i < cellstr.numel (); i++) { new_args(0) = cellstr(i); - octave_value_list tmp = octregexp (new_args, nargout, nm, + octave_value_list tmp = octregexp (new_args, nargout, who, case_insensitive); if (error_state) @@ -725,7 +343,7 @@ for (octave_idx_type i = 0; i < cellpat.numel (); i++) { new_args(1) = cellpat(i); - octave_value_list tmp = octregexp (new_args, nargout, nm, + octave_value_list tmp = octregexp (new_args, nargout, who, case_insensitive); if (error_state) @@ -739,7 +357,7 @@ { if (cellstr.dims () != cellpat.dims ()) - error ("%s: Inconsistent cell array dimensions", nm.c_str ()); + error ("%s: inconsistent cell array dimensions", who.c_str ()); else { for (int j = 0; j < nargout; j++) @@ -750,7 +368,7 @@ new_args(0) = cellstr(i); new_args(1) = cellpat(i); - octave_value_list tmp = octregexp (new_args, nargout, nm, + octave_value_list tmp = octregexp (new_args, nargout, who, case_insensitive); if (error_state) @@ -772,7 +390,7 @@ for (octave_idx_type i = 0; i < cellstr.numel (); i++) { new_args(0) = cellstr(i); - octave_value_list tmp = octregexp (new_args, nargout, nm, + octave_value_list tmp = octregexp (new_args, nargout, who, case_insensitive); if (error_state) @@ -799,7 +417,7 @@ for (octave_idx_type i = 0; i < cellpat.numel (); i++) { new_args(1) = cellpat(i); - octave_value_list tmp = octregexp (new_args, nargout, nm, + octave_value_list tmp = octregexp (new_args, nargout, who, case_insensitive); if (error_state) @@ -816,7 +434,7 @@ } } else - retval = octregexp (args, nargout, nm, case_insensitive); + retval = octregexp (args, nargout, who, case_insensitive); return retval; @@ -1022,9 +640,9 @@ if (nargin < 2) print_usage (); else if (args(0).is_cell () || args(1).is_cell ()) - retval = octcellregexp (args, nargout, "regexp", false); + retval = octcellregexp (args, nargout, "regexp"); else - retval = octregexp (args, nargout, "regexp", false); + retval = octregexp (args, nargout, "regexp"); return retval; } @@ -1402,7 +1020,7 @@ static octave_value -octregexprep (const octave_value_list &args, const std::string &nm) +octregexprep (const octave_value_list &args, const std::string &who) { octave_value retval; @@ -1423,12 +1041,9 @@ // Pack options excluding 'tokenize' and various output // reordering strings into regexp arg list - octave_value_list regexpargs (nargin-1, octave_value ()); + octave_value_list regexpargs (nargin-3, octave_value ()); - regexpargs(0) = args (0); - regexpargs(1) = args (1); - - int len = 2; + int len = 0; for (int i = 3; i < nargin; i++) { const std::string opt = args(i).string_value (); @@ -1441,165 +1056,13 @@ } regexpargs.resize (len); - // Identify replacement tokens; build a vector of group numbers in - // the replacement string so that we can quickly calculate the size - // of the replacement. - int tokens = 0; - for (size_t i=1; i < replacement.size (); i++) - { - if (replacement[i-1]=='$' && isdigit (replacement[i])) - { - tokens++; - i++; - } - } - std::vector<int> token (tokens); - - int kk = 0; - for (size_t i = 1; i < replacement.size (); i++) - { - if (replacement[i-1]=='$' && isdigit (replacement[i])) - { - token[kk++] = replacement[i]-'0'; - i++; - } - } - - // Perform replacement - std::string rep; - - if (tokens > 0) - { - std::list<regexp_elem> lst; - string_vector named; - int nopts; - bool once; - int sz = octregexp_list (regexpargs, nm , false, lst, named, nopts, once); - - if (error_state) - return retval; - if (sz == 0) - { - retval = args(0); - return retval; - } - - // Determine replacement length - const size_t replen = replacement.size () - 2*tokens; - int delta = 0; - const_iterator p = lst.begin (); - for (int i = 0; i < sz; i++) - { - OCTAVE_QUIT; - - const Matrix pairs (p->te); - size_t pairlen = 0; - for (int j = 0; j < tokens; j++) - { - if (token[j] == 0) - pairlen += static_cast<size_t> (p->e - p->s) + 1; - else if (token[j] <= pairs.rows ()) - pairlen += static_cast<size_t> (pairs(token[j]-1,1) - - pairs(token[j]-1,0)) + 1; - } - delta += static_cast<int> (replen + pairlen) - - static_cast<int> (p->e - p->s + 1); - p++; - } - - // Build replacement string - rep.reserve (buffer.size () + delta); - size_t from = 0; - p = lst.begin (); - for (int i = 0; i < sz; i++) - { - OCTAVE_QUIT; + regexp::opts options; + bool extra_args = false; + parse_options (options, regexpargs, who, 0, extra_args); + if (error_state) + return retval; - const Matrix pairs (p->te); - rep.append (&buffer[from], static_cast<size_t> (p->s - 1) - from); - from = static_cast<size_t> (p->e - 1) + 1; - for (size_t j = 1; j < replacement.size (); j++) - { - if (replacement[j-1]=='$' && isdigit (replacement[j])) - { - int k = replacement[j]-'0'; - if (k == 0) - { - // replace with entire match - rep.append (&buffer[static_cast<size_t> (p->e - 1)], - static_cast<size_t> (p->e - p->s) + 1); - } - else if (k <= pairs.rows ()) - { - // replace with group capture - rep.append (&buffer[static_cast<size_t> (pairs(k-1,0)-1)], - static_cast<size_t> (pairs(k-1,1) - - pairs(k-1,0)) + 1); - } - else - { - // replace with nothing - } - j++; - } - else - { - rep.append (1, replacement[j-1]); - } - if (j+1 == replacement.size ()) - { - rep.append (1, replacement[j]); - } - } - p++; - } - rep.append (&buffer[from], buffer.size () - from); - } - else - { - std::list<regexp_elem> lst; - string_vector named; - int nopts; - bool once; - int sz = octregexp_list (regexpargs, nm, false, lst, named, nopts, once); - - if (error_state) - return retval; - if (sz == 0) - { - retval = args (0); - return retval; - } - - // Determine replacement length - const size_t replen = replacement.size (); - int delta = 0; - const_iterator p = lst.begin (); - for (int i = 0; i < sz; i++) - { - OCTAVE_QUIT; - delta += static_cast<int> (replen) - - static_cast<int> (p->e - p->s + 1); - p++; - } - - // Build replacement string - rep.reserve (buffer.size () + delta); - size_t from = 0; - p = lst.begin (); - for (int i = 0; i < sz; i++) - { - OCTAVE_QUIT; - rep.append (&buffer[from], static_cast<size_t> (p->s - 1) - from); - from = static_cast<size_t> (p->e - 1) + 1; - rep.append (replacement); - p++; - } - rep.append (&buffer[from], buffer.size () - from); - } - - retval = rep; - return retval; + return regexp_replace (pattern, buffer, replacement, options, who); } DEFUN_DLD (regexprep, args, , @@ -1672,7 +1135,7 @@ { dv1 = pat.dims (); if (rep.numel () != 1 && dv1 != rep.dims ()) - error ("regexprep: Inconsistent cell array dimensions"); + error ("regexprep: inconsistent cell array dimensions"); } else if (rep.numel () != 1) dv1 = rep.dims ();