Mercurial > octave-nkf
diff src/DLD-FUNCTIONS/regexp.cc @ 6361:776e657c9422
[project @ 2007-02-27 09:45:03 by dbateman]
author | dbateman |
---|---|
date | Tue, 27 Feb 2007 09:45:03 +0000 |
parents | f9ac7ebf0e19 |
children | fd09c7e8c4c9 |
line wrap: on
line diff
--- a/src/DLD-FUNCTIONS/regexp.cc Mon Feb 26 21:08:50 2007 +0000 +++ b/src/DLD-FUNCTIONS/regexp.cc Tue Feb 27 09:45:03 2007 +0000 @@ -93,12 +93,6 @@ nopts = nargin - 2; - if (nargin < 2) - { - print_usage (); - return 0; - } - std::string buffer = args(0).string_value (); if (error_state) { @@ -581,6 +575,144 @@ return retval; } +static octave_value_list +octcellregexp (const octave_value_list &args, int nargout, const std::string &nm, + bool case_insensitive) +{ + octave_value_list retval; + + if (args(0).is_cell()) + { + OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout); + octave_value_list new_args = args; + Cell cellstr = args(0).cell_value(); + if (args(1).is_cell()) + { + Cell cellpat = args(1).cell_value(); + + if (cellpat.numel() == 1) + { + for (int j = 0; j < nargout; j++) + newretval[j].resize(cellstr.dims()); + + new_args(1) = cellpat(0); + + for (octave_idx_type i = 0; i < cellstr.numel (); i++) + { + new_args(0) = cellstr(i); + octave_value_list tmp = octregexp (new_args, nargout, nm, + case_insensitive); + + if (error_state) + break; + + for (int j = 0; j < nargout; j++) + newretval[j](i) = tmp(j); + } + } + else if (cellstr.numel() == 1) + { + for (int j = 0; j < nargout; j++) + newretval[j].resize(cellpat.dims()); + + new_args(0) = cellstr(0); + + for (octave_idx_type i = 0; i < cellpat.numel (); i++) + { + new_args(1) = cellpat(i); + octave_value_list tmp = octregexp (new_args, nargout, nm, + case_insensitive); + + if (error_state) + break; + + for (int j = 0; j < nargout; j++) + newretval[j](i) = tmp(j); + } + } + else if (cellstr.numel() == cellpat.numel()) + { + + if (cellstr.dims() != cellpat.dims()) + error ("%s: Inconsistent cell array dimensions", nm.c_str()); + else + { + for (int j = 0; j < nargout; j++) + newretval[j].resize(cellstr.dims()); + + for (octave_idx_type i = 0; i < cellstr.numel (); i++) + { + new_args(0) = cellstr(i); + new_args(1) = cellpat(i); + + octave_value_list tmp = octregexp (new_args, nargout, nm, + case_insensitive); + + if (error_state) + break; + + for (int j = 0; j < nargout; j++) + newretval[j](i) = tmp(j); + } + } + } + else + error ("regexp: cell array arguments must be scalar or equal size"); + } + else + { + for (int j = 0; j < nargout; j++) + newretval[j].resize(cellstr.dims()); + + for (octave_idx_type i = 0; i < cellstr.numel (); i++) + { + new_args(0) = cellstr(i); + octave_value_list tmp = octregexp (new_args, nargout, nm, case_insensitive); + + if (error_state) + break; + + for (int j = 0; j < nargout; j++) + newretval[j](i) = tmp(j); + } + } + + if (!error_state) + for (int j = 0; j < nargout; j++) + retval(j) = octave_value (newretval[j]); + } + else if (args(1).is_cell()) + { + OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout); + octave_value_list new_args = args; + Cell cellpat = args(1).cell_value(); + + for (int j = 0; j < nargout; j++) + newretval[j].resize(cellpat.dims()); + + for (octave_idx_type i = 0; i < cellpat.numel (); i++) + { + new_args(1) = cellpat(i); + octave_value_list tmp = octregexp (new_args, nargout, nm, case_insensitive); + + if (error_state) + break; + + for (int j = 0; j < nargout; j++) + newretval[j](i) = tmp(j); + } + + if (!error_state) + for (int j = 0; j < nargout; j++) + retval(j) = octave_value (newretval[j]); + } + else + retval = octregexp (args, nargout, nm, case_insensitive); + + return retval; + +} + DEFUN_DLD (regexp, args, nargout, "-*- texinfo -*-\n\ @deftypefn {Loadable Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}] =} regexp (@var{str}, @var{pat})\n\ @@ -713,7 +845,17 @@ @end table\n\ @end deftypefn") { - return octregexp (args, nargout, "regexp", false); + octave_value_list retval; + int nargin = args.length(); + + if (nargin < 2) + print_usage (); + else if (args(0).is_cell() || args(1).is_cell()) + retval = octcellregexp (args, nargout, "regexp", false); + else + retval = octregexp (args, nargout, "regexp", false); + + return retval; } /* @@ -876,6 +1018,11 @@ %!error regexp('string', 'tri', 'BadArg'); %!error regexp('string'); +%!assert(regexp({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)}) +%!assert(regexp({'asdfg-dfd','-dfd-dfd-','qasfdfdaq'},'-'),{6,[1,5,9],zeros(1,0)}) +%!assert(regexp({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},{'-';'f';'q'}),{6;[3,7];[1,9]}) +%!assert(regexp('Strings',{'t','s'}),{2,7}) + */ DEFUN_DLD(regexpi, args, nargout, @@ -888,7 +1035,17 @@ if there are none. See @code{regexp} for more details\n\ @end deftypefn") { - return octregexp (args, nargout, "regexp", true); + octave_value_list retval; + int nargin = args.length(); + + if (nargin < 2) + print_usage (); + else if (args(0).is_cell() || args(1).is_cell()) + retval = octcellregexp (args, nargout, "regexpi", true); + else + retval = octregexp (args, nargout, "regexpi", true); + + return retval; } /* @@ -1035,62 +1192,20 @@ %!error regexpi('string', 'tri', 'BadArg'); %!error regexpi('string'); +%!assert(regexpi({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)}) +%!assert(regexpi({'asdfg-dfd','-dfd-dfd-','qasfdfdaq'},'-'),{6,[1,5,9],zeros(1,0)}) +%!assert(regexpi({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},{'-';'f';'q'}),{6;[3,7];[1,9]}) +%!assert(regexpi('Strings',{'t','s'}),{2,[1,7]}) + */ -DEFUN_DLD(regexprep, args, , - "-*- texinfo -*-\n\ -@deftypefn {Function File} @var{string} = regexprep(@var{string}, @var{pat}, @var{repstr}, @var{options})\n\ -Replace matches of @var{pat} in @var{string} with @var{repstr}.\n\ -\n\ -\n\ -The replacement can contain @code{$i}, which subsubstitutes\n\ -for the ith set of parentheses in the match string. E.g.,\n\ -@example\n\ -\n\ - regexprep(\"Bill Dunn\",'(\\w+) (\\w+)','$2, $1')\n\ -\n\ -@end example\n\ -returns \"Dunn, Bill\"\n\ -\n\ -@var{options} may be zero or more of\n\ -@table @samp\n\ -\n\ -@item once\n\ -Replace only the first occurance of @var{pat} in the result.\n\ -\n\ -@item warnings\n\ -This option is present for compatibility but is ignored.\n\ -\n\ -@item ignorecase or matchcase\n\ -Ignore case for the pattern matching (see @code{regexpi}).\n\ -Alternatively, use (?i) or (?-i) in the pattern.\n\ -\n\ -@item lineanchors and stringanchors\n\ -Whether characters ^ and $ match the beginning and ending of lines.\n\ -Alternatively, use (?m) or (?-m) in the pattern.\n\ -\n\ -@item dotexceptnewline and dotall\n\ -Whether . matches newlines in the string.\n\ -Alternatively, use (?s) or (?-s) in the pattern.\n\ -\n\ -@item freespacing or literalspacing\n\ -Whether whitespace and # comments can be used to make the regular expression more readable.\n\ -Alternatively, use (?x) or (?-x) in the pattern.\n\ -\n\ -@end table\n\ -@seealso{regexp,regexpi}\n\ -@end deftypefn") + +static octave_value +octregexprep (const octave_value_list &args, const std::string &nm) { - octave_value_list retval; - + octave_value retval; int nargin = args.length(); - if (nargin < 3) - { - print_usage (); - return retval; - } - // Make sure we have string,pattern,replacement const std::string buffer = args(0).string_value (); if (error_state) return retval; @@ -1146,14 +1261,13 @@ std::list<regexp_elem> lst; string_vector named; int nopts; - int sz = octregexp_list (regexpargs, "regexprep", false, lst, named, - nopts); + int sz = octregexp_list (regexpargs, nm , false, lst, named, nopts); if (error_state) return retval; if (sz == 0) { - retval(0) = args(0); + retval = args(0); return retval; } @@ -1233,14 +1347,13 @@ std::list<regexp_elem> lst; string_vector named; int nopts; - int sz = octregexp_list (regexpargs, "regexprep", false, lst, named, - nopts); + int sz = octregexp_list (regexpargs, nm, false, lst, named,nopts); if (error_state) return retval; if (sz == 0) { - retval(0) = args(0); + retval = args(0); return retval; } @@ -1271,7 +1384,135 @@ rep.append(&buffer[from],buffer.size()-from); } - retval(0) = rep; + retval = rep; + return retval; +} + +DEFUN_DLD(regexprep, args, , + "-*- texinfo -*-\n\ +@deftypefn {Function File} @var{string} = regexprep(@var{string}, @var{pat}, @var{repstr}, @var{options})\n\ +Replace matches of @var{pat} in @var{string} with @var{repstr}.\n\ +\n\ +\n\ +The replacement can contain @code{$i}, which subsubstitutes\n\ +for the ith set of parentheses in the match string. E.g.,\n\ +@example\n\ +\n\ + regexprep(\"Bill Dunn\",'(\\w+) (\\w+)','$2, $1')\n\ +\n\ +@end example\n\ +returns \"Dunn, Bill\"\n\ +\n\ +@var{options} may be zero or more of\n\ +@table @samp\n\ +\n\ +@item once\n\ +Replace only the first occurance of @var{pat} in the result.\n\ +\n\ +@item warnings\n\ +This option is present for compatibility but is ignored.\n\ +\n\ +@item ignorecase or matchcase\n\ +Ignore case for the pattern matching (see @code{regexpi}).\n\ +Alternatively, use (?i) or (?-i) in the pattern.\n\ +\n\ +@item lineanchors and stringanchors\n\ +Whether characters ^ and $ match the beginning and ending of lines.\n\ +Alternatively, use (?m) or (?-m) in the pattern.\n\ +\n\ +@item dotexceptnewline and dotall\n\ +Whether . matches newlines in the string.\n\ +Alternatively, use (?s) or (?-s) in the pattern.\n\ +\n\ +@item freespacing or literalspacing\n\ +Whether whitespace and # comments can be used to make the regular expression more readable.\n\ +Alternatively, use (?x) or (?-x) in the pattern.\n\ +\n\ +@end table\n\ +@seealso{regexp,regexpi}\n\ +@end deftypefn") +{ + octave_value_list retval; + int nargin = args.length(); + + if (nargin < 3) + { + print_usage (); + return retval; + } + + if (args(0).is_cell() || args(1).is_cell() || args(2).is_cell()) + { + Cell str; + Cell pat; + Cell rep; + dim_vector dv(1,1); + + if (args(0).is_cell()) + str = args(0).cell_value(); + else + str = Cell (args(0)); + + if (args(1).is_cell()) + pat = args(1).cell_value(); + else + pat = Cell (args(1)); + + if (args(2).is_cell()) + rep = args(2).cell_value(); + else + rep = Cell (args(2)); + + if (str.numel() != 1) + { + dv = str.dims(); + if ((pat.numel() != 1 && dv != pat.dims()) || + (rep.numel() != 1 && dv != rep.dims())) + error ("regexprep: Inconsistent cell array dimensions"); + } + else if (pat.numel() != 1) + { + dv = pat.dims(); + if ((pat.numel() != 1 && dv != pat.dims()) || + (rep.numel() != 1 && dv != rep.dims())) + error ("regexprep: Inconsistent cell array dimensions"); + } + else if (rep.numel() != 1) + dv = rep.dims(); + + if (!error_state) + { + Cell ret (dv); + octave_value_list new_args = args; + + if (str.numel() == 1) + new_args(0) = str(0); + if (pat.numel() == 1) + new_args(1) = pat(0); + if (rep.numel() == 1) + new_args(2) = rep(0); + + for (octave_idx_type i = 0; i < dv.numel(); i++) + { + if (str.numel() != 1) + new_args(0) = str(i); + if (pat.numel() != 1) + new_args(1) = pat(i); + if (rep.numel() != 1) + new_args(2) = rep(i); + ret(i) = octregexprep (new_args, "regexprep"); + + if (error_state) + break; + } + + if (!error_state) + retval = octave_value (ret); + } + } + else + retval = octregexprep (args, "regexprep"); + return retval; } @@ -1322,6 +1563,11 @@ %!assert(regexprep("abc","(b)","$1."),"ab.c"); %!assert(regexprep("abc","(b)","$1.."),"ab..c"); +## Test cell array arguments +%!assert(regexprep("abc",{"b","a"},"?"),{"a?c","?bc"}) +%!assert(regexprep({"abc","cba"},"b","?"),{"a?c","c?a"}) +%!assert(regexprep({"abc","cba"},{"b","a"},{"?","!"}),{"a?c","cb!"}) + */ /*