changeset 10053:830986c43dee

implement compiled strrep
author Jaroslav Hajek <highegg@gmail.com>
date Mon, 04 Jan 2010 12:05:23 +0100
parents 5813ec5077b5
children 8f7f325fa678
files scripts/ChangeLog scripts/strings/module.mk scripts/strings/strrep.m src/ChangeLog src/DLD-FUNCTIONS/strfind.cc
diffstat 5 files changed, 137 insertions(+), 123 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/ChangeLog	Mon Jan 04 03:00:19 2010 -0500
+++ b/scripts/ChangeLog	Mon Jan 04 12:05:23 2010 +0100
@@ -1,3 +1,8 @@
+2010-01-04  Jaroslav Hajek  <highegg@gmail.com>
+
+	* strings/strrep.m: Remove.
+	* strings/module.mk: Update.
+
 2010-01-02  Jaroslav Hajek  <highegg@gmail.com>
 
 	* optimization/fsolve.m: Support old style jacobian passing.
--- a/scripts/strings/module.mk	Mon Jan 04 03:00:19 2010 -0500
+++ b/scripts/strings/module.mk	Mon Jan 04 12:05:23 2010 +0100
@@ -26,7 +26,6 @@
   strings/strjust.m \
   strings/strmatch.m \
   strings/strncmpi.m \
-  strings/strrep.m \
   strings/strtok.m \
   strings/strtrim.m \
   strings/strtrunc.m \
--- a/scripts/strings/strrep.m	Mon Jan 04 03:00:19 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,107 +0,0 @@
-## Copyright (C) 1995, 1996, 1998, 1999, 2000, 2005, 2006, 2007, 2008,
-##               2009 Kurt Hornik
-##
-## This file is part of Octave.
-##
-## Octave is free software; you can redistribute it and/or modify it
-## under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 3 of the License, or (at
-## your option) any later version.
-##
-## Octave is distributed in the hope that it will be useful, but
-## WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-## General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with Octave; see the file COPYING.  If not, see
-## <http://www.gnu.org/licenses/>.
-
-## -*- texinfo -*-
-## @deftypefn {Function File} {} strrep (@var{s}, @var{x}, @var{y})
-## Replace all occurrences of the substring @var{x} of the string @var{s}
-## with the string @var{y} and return the result.  For example,
-##
-## @example
-## @group
-## strrep ("This is a test string", "is", "&%$")
-##      @result{} "Th&%$ &%$ a test string"
-## @end group
-## @end example
-## @seealso{regexprep, strfind, findstr}
-## @end deftypefn
-
-## Author: Kurt Hornik <Kurt.Hornik@wu-wien.ac.at>
-## Created: 11 November 1994
-## Adapted-By: jwe
-
-function t = strrep (s, x, y)
-
-  if (nargin != 3)
-    print_usage ();
-  endif
-
-  if (! (ischar (s) && ischar (x) && ischar (y)))
-    error ("strrep: all arguments must be strings");
-  endif
-
-  if (length (x) > length (s) || isempty (x))
-    t = s;
-    return;
-  endif
-
-  ind = findstr (s, x, 0);
-
-  if (length(ind) == 0)
-    t = s;
-  elseif (length(y) > 0)
-    ## Replacement.
-    ##
-    ## Copy the parts of s that aren't being replaced.  This is done
-    ## with an index vector, with jumps where each search string
-    ## is found.  For a jump of 0 (target length == replacement length)
-    ## the index is just cumsum (ones (length (s))).  For non-zero
-    ## jumps, add the jump size to the ones vector at each found position.
-    jump = length(y) - length(x);
-    if (jump > 0)
-      ## S expands.
-      di = ones (size (s));
-      di(ind) = 1 + jump * ones (length (ind), 1);
-      t(cumsum (di)) = s;
-    elseif (jump < 0)
-      ## S contracts.
-      di = ones (jump * length (ind) + length (s), 1);
-      di (ind + jump * [0:length(ind)-1]) = 1 - jump * ones (length (ind), 1);
-      t = s (cumsum (di));
-    else
-      ## S stays the same length.
-      t = s;
-    endif
-    ## Now, substitute a copy of the replacement string whereever the
-    ## search string was found.  Note that we must first update the
-    ## target positions to account for any expansion or contraction
-    ## of s that may have occurred.
-    ind = ind + jump * [0:length(ind)-1];
-    repeat = [1:length(y)]' * ones (1, length (ind));
-    dest = ones (length (y), 1) * ind + repeat - 1;
-    t(dest) = y(repeat);
-  else
-    ## Deletion.
-    ##
-    ## Build an index vector of all locations where the target was found
-    ## in the search string, and zap them. 
-    t = toascii (s);
-    repeat = [1:length(x)]' * ones (1, length (ind));
-    delete = ones (length (x), 1) * ind + repeat - 1;
-    t(delete) = [];
-    t = char (t);
-  endif
-
-endfunction
-
-%!assert(strcmp (strrep ("This is a test string", "is", "&%$"),
-%! "Th&%$ &%$ a test string"));
-
-%!error strrep ();
-
-%!error strrep ("foo", "bar", 3, 4);
--- a/src/ChangeLog	Mon Jan 04 03:00:19 2010 -0500
+++ b/src/ChangeLog	Mon Jan 04 12:05:23 2010 +0100
@@ -1,3 +1,10 @@
+2010-01-04  Jaroslav Hajek  <highegg@gmail.com>
+
+	* DLD-FUNCTIONS/strfind.cc (qs_search): Optionally discard overlaps.
+	Return result as Array<octave_idx_type>.
+	(Fstrfind): Use octave_value (Array<octave_idx_type>) constructor.
+	(Fstrrep): New function.
+
 2010-01-04  John W. Eaton  <jwe@octave.org>
 
 	* lex.ll (can_be_command): New function.
--- a/src/DLD-FUNCTIONS/strfind.cc	Mon Jan 04 03:00:19 2010 -0500
+++ b/src/DLD-FUNCTIONS/strfind.cc	Mon Jan 04 12:05:23 2010 +0100
@@ -27,6 +27,7 @@
 #include <string>
 #include <climits>
 #include <algorithm>
+#include <deque>
 
 #include "Cell.h"
 #include "ov.h"
@@ -54,34 +55,59 @@
 }
 
 
-static octave_value 
+static Array<octave_idx_type> 
 qs_search (const Array<char>& needle,
            const Array<char>& haystack,
-           const octave_idx_type table[UCHAR_MAX])
+           const octave_idx_type table[UCHAR_MAX],
+           bool overlaps = true)
 {
   const char *x = needle.data ();
   octave_idx_type m = needle.numel ();
   const char *y = haystack.data ();
   octave_idx_type n = haystack.numel ();
 
-  std::vector<octave_idx_type> accum;
+  // We'll use deque because it typically has the most favorable properties for
+  // the operation we need.
+  std::deque<octave_idx_type> accum;
   if (n >= m)
     {
       octave_idx_type j = 0;
-      while (j < n - m) {
-        if (std::equal (x, x + m, y + j))
-          accum.push_back (j);
-        j += table[ORD(y[j + m])];
-      }
 
-      if (std::equal (x, x + m, y + n - m))
-        accum.push_back (n - m);
+      if (overlaps)
+        {
+          while (j < n - m) 
+            {
+              if (std::equal (x, x + m, y + j))
+                accum.push_back (j);
+              j += table[ORD(y[j + m])];
+            }
+        }
+      else
+        {
+          while (j < n - m) 
+            {
+              if (std::equal (x, x + m, y + j))
+                {
+                  accum.push_back (j);
+                  j += m;
+                }
+              else
+                j += table[ORD(y[j + m])];
+            }
+        }
+
+      if (j == n - m && std::equal (x, x + m, y + j))
+        accum.push_back (j);
     }
 
   octave_idx_type nmatch = accum.size ();
-  NoAlias<Matrix> result (std::min (1, nmatch), nmatch);
-  for (octave_idx_type i = 0; i < nmatch; i++)
-    result(i) = accum[i] + 1;
+  Array<octave_idx_type> result (dim_vector (std::min (1, nmatch), nmatch));
+  octave_idx_type k = 0;
+  for (std::deque<octave_idx_type>::const_iterator iter = accum.begin (); 
+       iter != accum.end (); iter++)
+    {
+      result.xelem (k++) = *iter;
+    }
 
   return result;
 }
@@ -132,7 +158,8 @@
           qs_preprocess (needle, table);
 
           if (argstr.is_string ())
-            retval = qs_search (needle, argstr.char_array_value (), table);
+            retval = octave_value (qs_search (needle, argstr.char_array_value (), table), 
+                                   true, true);
           else if (argstr.is_cell ())
             {
               const Cell argsc = argstr.cell_value ();
@@ -143,7 +170,8 @@
                 {
                   octave_value argse = argsc(i);
                   if (argse.is_string ())
-                    retc(i) = qs_search (needle, argse.char_array_value (), table);
+                    retc(i) = octave_value (qs_search (needle, argse.char_array_value (), table), 
+                                            true, true);
                   else
                     {
                       error ("strfind: each cell element must be a string");
@@ -177,3 +205,85 @@
 %!assert (strfind ("Linux _is_ user-friendly. It just isn't ignorant-friendly or idiot-friendly.", "friendly"), [17, 50, 68]);
 
 */
+
+DEFUN_DLD (strrep, args, ,
+  "-*- texinfo -*-\n\
+@deftypefn {Loadable Function} {} strrep (@var{s}, @var{x}, @var{y})\n\
+Replace all occurrences of the substring @var{x} of the string @var{s}\n\
+with the string @var{y} and return the result.  For example,\n\
+\n\
+@example\n\
+@group\n\
+strrep (\"This is a test string\", \"is\", \"&%$\")\n\
+     @result{} \"Th&%$ &%$ a test string\"\n\
+@end group\n\
+@end example\n\
+@seealso{regexprep, strfind, findstr}\n\
+@end deftypefn")
+{
+  octave_value retval;
+  int nargin = args.length ();
+
+  if (nargin == 3)
+    {
+      octave_value argstr = args(0), argp = args(1), argr = args(2);
+      if (argp.is_string () && argp.is_string () && argr.is_string ())
+        {
+          const Array<char> str = argstr.char_array_value ();
+          const Array<char> pat = argp.char_array_value ();
+          const Array<char> rep = argr.char_array_value ();
+          Array<char> ret = str;
+
+          octave_idx_type siz = str.numel (), psiz = pat.numel (), rsiz = rep.numel ();
+
+          if (psiz != 0)
+            {
+              OCTAVE_LOCAL_BUFFER (octave_idx_type, table, UCHAR_MAX);
+              qs_preprocess (pat, table);
+
+              // Look up matches, without overlaps.
+              const Array<octave_idx_type> idx = qs_search (pat, str, table, false);
+              octave_idx_type nidx = idx.numel ();
+
+              if (nidx)
+                {
+                  // Compute result size.
+                  octave_idx_type retsiz = siz + nidx * (rsiz - psiz);
+                  ret.clear (dim_vector (1, retsiz));
+                  const char *src = str.data (), *reps = rep.data ();
+                  char *dest = ret.fortran_vec ();
+
+                  octave_idx_type k = 0;
+                  for (octave_idx_type i = 0; i < nidx; i++)
+                    {
+                      octave_idx_type j = idx(i);
+                      dest = std::copy (src + k, src + j, dest);
+                      dest = std::copy (reps, reps + rsiz, dest);
+                      k = j + psiz;
+                    }
+
+                  std::copy (src + k, src + siz, dest);
+                }
+            }
+
+          retval = ret;
+        }
+      else
+        error ("strrep: all arguments must be strings");
+    }
+  else
+    print_usage ();
+
+  return retval;
+}
+
+/*
+
+%!assert(strcmp (strrep ("This is a test string", "is", "&%$"),
+%! "Th&%$ &%$ a test string"));
+
+%!error strrep ();
+
+%!error strrep ("foo", "bar", 3, 4);
+
+*/