diff src/DLD-FUNCTIONS/strfind.cc @ 10053:830986c43dee

implement compiled strrep
author Jaroslav Hajek <highegg@gmail.com>
date Mon, 04 Jan 2010 12:05:23 +0100
parents a14dc255427f
children 5e2b4b7967cc
line wrap: on
line diff
--- a/src/DLD-FUNCTIONS/strfind.cc	Mon Jan 04 03:00:19 2010 -0500
+++ b/src/DLD-FUNCTIONS/strfind.cc	Mon Jan 04 12:05:23 2010 +0100
@@ -27,6 +27,7 @@
 #include <string>
 #include <climits>
 #include <algorithm>
+#include <deque>
 
 #include "Cell.h"
 #include "ov.h"
@@ -54,34 +55,59 @@
 }
 
 
-static octave_value 
+static Array<octave_idx_type> 
 qs_search (const Array<char>& needle,
            const Array<char>& haystack,
-           const octave_idx_type table[UCHAR_MAX])
+           const octave_idx_type table[UCHAR_MAX],
+           bool overlaps = true)
 {
   const char *x = needle.data ();
   octave_idx_type m = needle.numel ();
   const char *y = haystack.data ();
   octave_idx_type n = haystack.numel ();
 
-  std::vector<octave_idx_type> accum;
+  // We'll use deque because it typically has the most favorable properties for
+  // the operation we need.
+  std::deque<octave_idx_type> accum;
   if (n >= m)
     {
       octave_idx_type j = 0;
-      while (j < n - m) {
-        if (std::equal (x, x + m, y + j))
-          accum.push_back (j);
-        j += table[ORD(y[j + m])];
-      }
 
-      if (std::equal (x, x + m, y + n - m))
-        accum.push_back (n - m);
+      if (overlaps)
+        {
+          while (j < n - m) 
+            {
+              if (std::equal (x, x + m, y + j))
+                accum.push_back (j);
+              j += table[ORD(y[j + m])];
+            }
+        }
+      else
+        {
+          while (j < n - m) 
+            {
+              if (std::equal (x, x + m, y + j))
+                {
+                  accum.push_back (j);
+                  j += m;
+                }
+              else
+                j += table[ORD(y[j + m])];
+            }
+        }
+
+      if (j == n - m && std::equal (x, x + m, y + j))
+        accum.push_back (j);
     }
 
   octave_idx_type nmatch = accum.size ();
-  NoAlias<Matrix> result (std::min (1, nmatch), nmatch);
-  for (octave_idx_type i = 0; i < nmatch; i++)
-    result(i) = accum[i] + 1;
+  Array<octave_idx_type> result (dim_vector (std::min (1, nmatch), nmatch));
+  octave_idx_type k = 0;
+  for (std::deque<octave_idx_type>::const_iterator iter = accum.begin (); 
+       iter != accum.end (); iter++)
+    {
+      result.xelem (k++) = *iter;
+    }
 
   return result;
 }
@@ -132,7 +158,8 @@
           qs_preprocess (needle, table);
 
           if (argstr.is_string ())
-            retval = qs_search (needle, argstr.char_array_value (), table);
+            retval = octave_value (qs_search (needle, argstr.char_array_value (), table), 
+                                   true, true);
           else if (argstr.is_cell ())
             {
               const Cell argsc = argstr.cell_value ();
@@ -143,7 +170,8 @@
                 {
                   octave_value argse = argsc(i);
                   if (argse.is_string ())
-                    retc(i) = qs_search (needle, argse.char_array_value (), table);
+                    retc(i) = octave_value (qs_search (needle, argse.char_array_value (), table), 
+                                            true, true);
                   else
                     {
                       error ("strfind: each cell element must be a string");
@@ -177,3 +205,85 @@
 %!assert (strfind ("Linux _is_ user-friendly. It just isn't ignorant-friendly or idiot-friendly.", "friendly"), [17, 50, 68]);
 
 */
+
+DEFUN_DLD (strrep, args, ,
+  "-*- texinfo -*-\n\
+@deftypefn {Loadable Function} {} strrep (@var{s}, @var{x}, @var{y})\n\
+Replace all occurrences of the substring @var{x} of the string @var{s}\n\
+with the string @var{y} and return the result.  For example,\n\
+\n\
+@example\n\
+@group\n\
+strrep (\"This is a test string\", \"is\", \"&%$\")\n\
+     @result{} \"Th&%$ &%$ a test string\"\n\
+@end group\n\
+@end example\n\
+@seealso{regexprep, strfind, findstr}\n\
+@end deftypefn")
+{
+  octave_value retval;
+  int nargin = args.length ();
+
+  if (nargin == 3)
+    {
+      octave_value argstr = args(0), argp = args(1), argr = args(2);
+      if (argp.is_string () && argp.is_string () && argr.is_string ())
+        {
+          const Array<char> str = argstr.char_array_value ();
+          const Array<char> pat = argp.char_array_value ();
+          const Array<char> rep = argr.char_array_value ();
+          Array<char> ret = str;
+
+          octave_idx_type siz = str.numel (), psiz = pat.numel (), rsiz = rep.numel ();
+
+          if (psiz != 0)
+            {
+              OCTAVE_LOCAL_BUFFER (octave_idx_type, table, UCHAR_MAX);
+              qs_preprocess (pat, table);
+
+              // Look up matches, without overlaps.
+              const Array<octave_idx_type> idx = qs_search (pat, str, table, false);
+              octave_idx_type nidx = idx.numel ();
+
+              if (nidx)
+                {
+                  // Compute result size.
+                  octave_idx_type retsiz = siz + nidx * (rsiz - psiz);
+                  ret.clear (dim_vector (1, retsiz));
+                  const char *src = str.data (), *reps = rep.data ();
+                  char *dest = ret.fortran_vec ();
+
+                  octave_idx_type k = 0;
+                  for (octave_idx_type i = 0; i < nidx; i++)
+                    {
+                      octave_idx_type j = idx(i);
+                      dest = std::copy (src + k, src + j, dest);
+                      dest = std::copy (reps, reps + rsiz, dest);
+                      k = j + psiz;
+                    }
+
+                  std::copy (src + k, src + siz, dest);
+                }
+            }
+
+          retval = ret;
+        }
+      else
+        error ("strrep: all arguments must be strings");
+    }
+  else
+    print_usage ();
+
+  return retval;
+}
+
+/*
+
+%!assert(strcmp (strrep ("This is a test string", "is", "&%$"),
+%! "Th&%$ &%$ a test string"));
+
+%!error strrep ();
+
+%!error strrep ("foo", "bar", 3, 4);
+
+*/