comparison src/DLD-FUNCTIONS/strfind.cc @ 10022:a14dc255427f

omitted file in last patch
author Jaroslav Hajek <highegg@gmail.com>
date Fri, 25 Dec 2009 21:26:16 +0100
parents
children 830986c43dee
comparison
equal deleted inserted replaced
10021:bf26f81d009f 10022:a14dc255427f
1 /*
2
3 Copyright (C) 2009 Jaroslav Hajek
4
5 This file is part of Octave.
6
7 Octave is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3 of the License, or (at your
10 option) any later version.
11
12 Octave is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Octave; see the file COPYING. If not, see
19 <http://www.gnu.org/licenses/>.
20
21 */
22
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif
26
27 #include <string>
28 #include <climits>
29 #include <algorithm>
30
31 #include "Cell.h"
32 #include "ov.h"
33 #include "defun-dld.h"
34 #include "unwind-prot.h"
35 #include "gripes.h"
36 #include "utils.h"
37
38 // This allows safe indexing with char. In C++, char may be (and often is) signed!
39 #define ORD(ch) static_cast<unsigned char>(ch)
40
41 // This is the quick search algorithm, as described at
42 // http://www-igm.univ-mlv.fr/~lecroq/string/node19.html
43 static void
44 qs_preprocess (const Array<char>& needle,
45 octave_idx_type table[UCHAR_MAX])
46 {
47 const char *x = needle.data ();
48 octave_idx_type m = needle.numel ();
49
50 for (octave_idx_type i = 0; i < UCHAR_MAX; i++)
51 table[i] = m + 1;
52 for (octave_idx_type i = 0; i < m; i++)
53 table[ORD(x[i])] = m - i;
54 }
55
56
57 static octave_value
58 qs_search (const Array<char>& needle,
59 const Array<char>& haystack,
60 const octave_idx_type table[UCHAR_MAX])
61 {
62 const char *x = needle.data ();
63 octave_idx_type m = needle.numel ();
64 const char *y = haystack.data ();
65 octave_idx_type n = haystack.numel ();
66
67 std::vector<octave_idx_type> accum;
68 if (n >= m)
69 {
70 octave_idx_type j = 0;
71 while (j < n - m) {
72 if (std::equal (x, x + m, y + j))
73 accum.push_back (j);
74 j += table[ORD(y[j + m])];
75 }
76
77 if (std::equal (x, x + m, y + n - m))
78 accum.push_back (n - m);
79 }
80
81 octave_idx_type nmatch = accum.size ();
82 NoAlias<Matrix> result (std::min (1, nmatch), nmatch);
83 for (octave_idx_type i = 0; i < nmatch; i++)
84 result(i) = accum[i] + 1;
85
86 return result;
87 }
88
89 DEFUN_DLD (strfind, args, ,
90 "-*- texinfo -*-\n\
91 @deftypefn {Loadable Function} {@var{idx} =} strfind (@var{str}, @var{pattern})\n\
92 @deftypefnx {Loadable Function} {@var{idx} =} strfind (@var{cellstr}, @var{pattern})\n\
93 Search for @var{pattern} in the string @var{str} and return the\n\
94 starting index of every such occurrence in the vector @var{idx}.\n\
95 If there is no such occurrence, or if @var{pattern} is longer\n\
96 than @var{str}, then @var{idx} is the empty array @code{[]}.\n\
97 \n\
98 If the cell array of strings @var{cellstr} is specified instead of the\n\
99 string @var{str}, then @var{idx} is a cell array of vectors, as specified\n\
100 above. Examples:\n\
101 \n\
102 @example\n\
103 @group\n\
104 strfind (\"abababa\", \"aba\")\n\
105 @result{} [1, 3, 5]\n\
106 \n\
107 strfind (@{\"abababa\", \"bebebe\", \"ab\"@}, \"aba\")\n\
108 @result{} ans =\n\
109 @{\n\
110 [1,1] =\n\
111 \n\
112 1 3 5\n\
113 \n\
114 [1,2] = [](1x0)\n\
115 [1,3] = [](1x0)\n\
116 @}\n\
117 @end group\n\
118 @end example\n\
119 @seealso{findstr, strmatch, strcmp, strncmp, strcmpi, strncmpi, find}\n\
120 @end deftypefn")
121 {
122 octave_value retval;
123 int nargin = args.length ();
124
125 if (nargin == 2)
126 {
127 octave_value argstr = args(0), argp = args(1);
128 if (argp.is_string ())
129 {
130 Array<char> needle = argp.char_array_value ();
131 OCTAVE_LOCAL_BUFFER (octave_idx_type, table, UCHAR_MAX);
132 qs_preprocess (needle, table);
133
134 if (argstr.is_string ())
135 retval = qs_search (needle, argstr.char_array_value (), table);
136 else if (argstr.is_cell ())
137 {
138 const Cell argsc = argstr.cell_value ();
139 Cell retc (argsc.dims ());
140 octave_idx_type ns = argsc.numel ();
141
142 for (octave_idx_type i = 0; i < ns; i++)
143 {
144 octave_value argse = argsc(i);
145 if (argse.is_string ())
146 retc(i) = qs_search (needle, argse.char_array_value (), table);
147 else
148 {
149 error ("strfind: each cell element must be a string");
150 break;
151 }
152 }
153
154 retval = retc;
155 }
156 else
157 error ("strfind: first argument must be a string or cell array of strings");
158 }
159 else
160 error ("strfind: pattern must be a string");
161 }
162 else
163 print_usage ();
164
165 return retval;
166 }
167
168 /*
169
170 %!error strfind ();
171 %!error strfind ("foo", "bar", 1);
172 %!error strfind ("foo", 100);
173 %!error strfind (100, "foo");
174
175 %!assert (strfind ("abababa", "aba"), [1, 3, 5]);
176 %!assert (strfind ({"abababa", "bla", "bla"}, "a"), {[1, 3, 5, 7], 3, 3});
177 %!assert (strfind ("Linux _is_ user-friendly. It just isn't ignorant-friendly or idiot-friendly.", "friendly"), [17, 50, 68]);
178
179 */