Mercurial > octave
diff src/DLD-FUNCTIONS/strfind.cc @ 10053:830986c43dee
implement compiled strrep
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Mon, 04 Jan 2010 12:05:23 +0100 |
parents | a14dc255427f |
children | 5e2b4b7967cc |
line wrap: on
line diff
--- a/src/DLD-FUNCTIONS/strfind.cc Mon Jan 04 03:00:19 2010 -0500 +++ b/src/DLD-FUNCTIONS/strfind.cc Mon Jan 04 12:05:23 2010 +0100 @@ -27,6 +27,7 @@ #include <string> #include <climits> #include <algorithm> +#include <deque> #include "Cell.h" #include "ov.h" @@ -54,34 +55,59 @@ } -static octave_value +static Array<octave_idx_type> qs_search (const Array<char>& needle, const Array<char>& haystack, - const octave_idx_type table[UCHAR_MAX]) + const octave_idx_type table[UCHAR_MAX], + bool overlaps = true) { const char *x = needle.data (); octave_idx_type m = needle.numel (); const char *y = haystack.data (); octave_idx_type n = haystack.numel (); - std::vector<octave_idx_type> accum; + // We'll use deque because it typically has the most favorable properties for + // the operation we need. + std::deque<octave_idx_type> accum; if (n >= m) { octave_idx_type j = 0; - while (j < n - m) { - if (std::equal (x, x + m, y + j)) - accum.push_back (j); - j += table[ORD(y[j + m])]; - } - if (std::equal (x, x + m, y + n - m)) - accum.push_back (n - m); + if (overlaps) + { + while (j < n - m) + { + if (std::equal (x, x + m, y + j)) + accum.push_back (j); + j += table[ORD(y[j + m])]; + } + } + else + { + while (j < n - m) + { + if (std::equal (x, x + m, y + j)) + { + accum.push_back (j); + j += m; + } + else + j += table[ORD(y[j + m])]; + } + } + + if (j == n - m && std::equal (x, x + m, y + j)) + accum.push_back (j); } octave_idx_type nmatch = accum.size (); - NoAlias<Matrix> result (std::min (1, nmatch), nmatch); - for (octave_idx_type i = 0; i < nmatch; i++) - result(i) = accum[i] + 1; + Array<octave_idx_type> result (dim_vector (std::min (1, nmatch), nmatch)); + octave_idx_type k = 0; + for (std::deque<octave_idx_type>::const_iterator iter = accum.begin (); + iter != accum.end (); iter++) + { + result.xelem (k++) = *iter; + } return result; } @@ -132,7 +158,8 @@ qs_preprocess (needle, table); if (argstr.is_string ()) - retval = qs_search (needle, argstr.char_array_value (), table); + retval = octave_value (qs_search (needle, argstr.char_array_value (), table), + true, true); else if (argstr.is_cell ()) { const Cell argsc = argstr.cell_value (); @@ -143,7 +170,8 @@ { octave_value argse = argsc(i); if (argse.is_string ()) - retc(i) = qs_search (needle, argse.char_array_value (), table); + retc(i) = octave_value (qs_search (needle, argse.char_array_value (), table), + true, true); else { error ("strfind: each cell element must be a string"); @@ -177,3 +205,85 @@ %!assert (strfind ("Linux _is_ user-friendly. It just isn't ignorant-friendly or idiot-friendly.", "friendly"), [17, 50, 68]); */ + +DEFUN_DLD (strrep, args, , + "-*- texinfo -*-\n\ +@deftypefn {Loadable Function} {} strrep (@var{s}, @var{x}, @var{y})\n\ +Replace all occurrences of the substring @var{x} of the string @var{s}\n\ +with the string @var{y} and return the result. For example,\n\ +\n\ +@example\n\ +@group\n\ +strrep (\"This is a test string\", \"is\", \"&%$\")\n\ + @result{} \"Th&%$ &%$ a test string\"\n\ +@end group\n\ +@end example\n\ +@seealso{regexprep, strfind, findstr}\n\ +@end deftypefn") +{ + octave_value retval; + int nargin = args.length (); + + if (nargin == 3) + { + octave_value argstr = args(0), argp = args(1), argr = args(2); + if (argp.is_string () && argp.is_string () && argr.is_string ()) + { + const Array<char> str = argstr.char_array_value (); + const Array<char> pat = argp.char_array_value (); + const Array<char> rep = argr.char_array_value (); + Array<char> ret = str; + + octave_idx_type siz = str.numel (), psiz = pat.numel (), rsiz = rep.numel (); + + if (psiz != 0) + { + OCTAVE_LOCAL_BUFFER (octave_idx_type, table, UCHAR_MAX); + qs_preprocess (pat, table); + + // Look up matches, without overlaps. + const Array<octave_idx_type> idx = qs_search (pat, str, table, false); + octave_idx_type nidx = idx.numel (); + + if (nidx) + { + // Compute result size. + octave_idx_type retsiz = siz + nidx * (rsiz - psiz); + ret.clear (dim_vector (1, retsiz)); + const char *src = str.data (), *reps = rep.data (); + char *dest = ret.fortran_vec (); + + octave_idx_type k = 0; + for (octave_idx_type i = 0; i < nidx; i++) + { + octave_idx_type j = idx(i); + dest = std::copy (src + k, src + j, dest); + dest = std::copy (reps, reps + rsiz, dest); + k = j + psiz; + } + + std::copy (src + k, src + siz, dest); + } + } + + retval = ret; + } + else + error ("strrep: all arguments must be strings"); + } + else + print_usage (); + + return retval; +} + +/* + +%!assert(strcmp (strrep ("This is a test string", "is", "&%$"), +%! "Th&%$ &%$ a test string")); + +%!error strrep (); + +%!error strrep ("foo", "bar", 3, 4); + +*/