Mercurial > octave-nkf
comparison src/DLD-FUNCTIONS/strfind.cc @ 10022:a14dc255427f
omitted file in last patch
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Fri, 25 Dec 2009 21:26:16 +0100 |
parents | |
children | 830986c43dee |
comparison
equal
deleted
inserted
replaced
10021:bf26f81d009f | 10022:a14dc255427f |
---|---|
1 /* | |
2 | |
3 Copyright (C) 2009 Jaroslav Hajek | |
4 | |
5 This file is part of Octave. | |
6 | |
7 Octave is free software; you can redistribute it and/or modify it | |
8 under the terms of the GNU General Public License as published by the | |
9 Free Software Foundation; either version 3 of the License, or (at your | |
10 option) any later version. | |
11 | |
12 Octave is distributed in the hope that it will be useful, but WITHOUT | |
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with Octave; see the file COPYING. If not, see | |
19 <http://www.gnu.org/licenses/>. | |
20 | |
21 */ | |
22 | |
23 #ifdef HAVE_CONFIG_H | |
24 #include <config.h> | |
25 #endif | |
26 | |
27 #include <string> | |
28 #include <climits> | |
29 #include <algorithm> | |
30 | |
31 #include "Cell.h" | |
32 #include "ov.h" | |
33 #include "defun-dld.h" | |
34 #include "unwind-prot.h" | |
35 #include "gripes.h" | |
36 #include "utils.h" | |
37 | |
38 // This allows safe indexing with char. In C++, char may be (and often is) signed! | |
39 #define ORD(ch) static_cast<unsigned char>(ch) | |
40 | |
41 // This is the quick search algorithm, as described at | |
42 // http://www-igm.univ-mlv.fr/~lecroq/string/node19.html | |
43 static void | |
44 qs_preprocess (const Array<char>& needle, | |
45 octave_idx_type table[UCHAR_MAX]) | |
46 { | |
47 const char *x = needle.data (); | |
48 octave_idx_type m = needle.numel (); | |
49 | |
50 for (octave_idx_type i = 0; i < UCHAR_MAX; i++) | |
51 table[i] = m + 1; | |
52 for (octave_idx_type i = 0; i < m; i++) | |
53 table[ORD(x[i])] = m - i; | |
54 } | |
55 | |
56 | |
57 static octave_value | |
58 qs_search (const Array<char>& needle, | |
59 const Array<char>& haystack, | |
60 const octave_idx_type table[UCHAR_MAX]) | |
61 { | |
62 const char *x = needle.data (); | |
63 octave_idx_type m = needle.numel (); | |
64 const char *y = haystack.data (); | |
65 octave_idx_type n = haystack.numel (); | |
66 | |
67 std::vector<octave_idx_type> accum; | |
68 if (n >= m) | |
69 { | |
70 octave_idx_type j = 0; | |
71 while (j < n - m) { | |
72 if (std::equal (x, x + m, y + j)) | |
73 accum.push_back (j); | |
74 j += table[ORD(y[j + m])]; | |
75 } | |
76 | |
77 if (std::equal (x, x + m, y + n - m)) | |
78 accum.push_back (n - m); | |
79 } | |
80 | |
81 octave_idx_type nmatch = accum.size (); | |
82 NoAlias<Matrix> result (std::min (1, nmatch), nmatch); | |
83 for (octave_idx_type i = 0; i < nmatch; i++) | |
84 result(i) = accum[i] + 1; | |
85 | |
86 return result; | |
87 } | |
88 | |
89 DEFUN_DLD (strfind, args, , | |
90 "-*- texinfo -*-\n\ | |
91 @deftypefn {Loadable Function} {@var{idx} =} strfind (@var{str}, @var{pattern})\n\ | |
92 @deftypefnx {Loadable Function} {@var{idx} =} strfind (@var{cellstr}, @var{pattern})\n\ | |
93 Search for @var{pattern} in the string @var{str} and return the\n\ | |
94 starting index of every such occurrence in the vector @var{idx}.\n\ | |
95 If there is no such occurrence, or if @var{pattern} is longer\n\ | |
96 than @var{str}, then @var{idx} is the empty array @code{[]}.\n\ | |
97 \n\ | |
98 If the cell array of strings @var{cellstr} is specified instead of the\n\ | |
99 string @var{str}, then @var{idx} is a cell array of vectors, as specified\n\ | |
100 above. Examples:\n\ | |
101 \n\ | |
102 @example\n\ | |
103 @group\n\ | |
104 strfind (\"abababa\", \"aba\")\n\ | |
105 @result{} [1, 3, 5]\n\ | |
106 \n\ | |
107 strfind (@{\"abababa\", \"bebebe\", \"ab\"@}, \"aba\")\n\ | |
108 @result{} ans =\n\ | |
109 @{\n\ | |
110 [1,1] =\n\ | |
111 \n\ | |
112 1 3 5\n\ | |
113 \n\ | |
114 [1,2] = [](1x0)\n\ | |
115 [1,3] = [](1x0)\n\ | |
116 @}\n\ | |
117 @end group\n\ | |
118 @end example\n\ | |
119 @seealso{findstr, strmatch, strcmp, strncmp, strcmpi, strncmpi, find}\n\ | |
120 @end deftypefn") | |
121 { | |
122 octave_value retval; | |
123 int nargin = args.length (); | |
124 | |
125 if (nargin == 2) | |
126 { | |
127 octave_value argstr = args(0), argp = args(1); | |
128 if (argp.is_string ()) | |
129 { | |
130 Array<char> needle = argp.char_array_value (); | |
131 OCTAVE_LOCAL_BUFFER (octave_idx_type, table, UCHAR_MAX); | |
132 qs_preprocess (needle, table); | |
133 | |
134 if (argstr.is_string ()) | |
135 retval = qs_search (needle, argstr.char_array_value (), table); | |
136 else if (argstr.is_cell ()) | |
137 { | |
138 const Cell argsc = argstr.cell_value (); | |
139 Cell retc (argsc.dims ()); | |
140 octave_idx_type ns = argsc.numel (); | |
141 | |
142 for (octave_idx_type i = 0; i < ns; i++) | |
143 { | |
144 octave_value argse = argsc(i); | |
145 if (argse.is_string ()) | |
146 retc(i) = qs_search (needle, argse.char_array_value (), table); | |
147 else | |
148 { | |
149 error ("strfind: each cell element must be a string"); | |
150 break; | |
151 } | |
152 } | |
153 | |
154 retval = retc; | |
155 } | |
156 else | |
157 error ("strfind: first argument must be a string or cell array of strings"); | |
158 } | |
159 else | |
160 error ("strfind: pattern must be a string"); | |
161 } | |
162 else | |
163 print_usage (); | |
164 | |
165 return retval; | |
166 } | |
167 | |
168 /* | |
169 | |
170 %!error strfind (); | |
171 %!error strfind ("foo", "bar", 1); | |
172 %!error strfind ("foo", 100); | |
173 %!error strfind (100, "foo"); | |
174 | |
175 %!assert (strfind ("abababa", "aba"), [1, 3, 5]); | |
176 %!assert (strfind ({"abababa", "bla", "bla"}, "a"), {[1, 3, 5, 7], 3, 3}); | |
177 %!assert (strfind ("Linux _is_ user-friendly. It just isn't ignorant-friendly or idiot-friendly.", "friendly"), [17, 50, 68]); | |
178 | |
179 */ |