annotate scripts/strings/findstr.m @ 19957:e78c0514523d

restore strmatch function; backout changeset f9959972949a
author John W. Eaton <jwe@octave.org>
date Wed, 18 Mar 2015 10:20:26 -0400
parents 9fc020886ae9
children df437a52bcaf
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
19697
4197fc428c7d maint: Update copyright notices for 2015.
John W. Eaton <jwe@octave.org>
parents: 19062
diff changeset
1 ## Copyright (C) 1996-2015 Kurt Hornik
2325
b5568c31ee2c [project @ 1996-07-15 22:20:21 by jwe]
jwe
parents: 2314
diff changeset
2 ##
2313
5ca126254d15 [project @ 1996-07-11 21:25:22 by jwe]
jwe
parents: 2311
diff changeset
3 ## This file is part of Octave.
5ca126254d15 [project @ 1996-07-11 21:25:22 by jwe]
jwe
parents: 2311
diff changeset
4 ##
5ca126254d15 [project @ 1996-07-11 21:25:22 by jwe]
jwe
parents: 2311
diff changeset
5 ## Octave is free software; you can redistribute it and/or modify it
5ca126254d15 [project @ 1996-07-11 21:25:22 by jwe]
jwe
parents: 2311
diff changeset
6 ## under the terms of the GNU General Public License as published by
7016
93c65f2a5668 [project @ 2007-10-12 06:40:56 by jwe]
jwe
parents: 6046
diff changeset
7 ## the Free Software Foundation; either version 3 of the License, or (at
93c65f2a5668 [project @ 2007-10-12 06:40:56 by jwe]
jwe
parents: 6046
diff changeset
8 ## your option) any later version.
2313
5ca126254d15 [project @ 1996-07-11 21:25:22 by jwe]
jwe
parents: 2311
diff changeset
9 ##
5ca126254d15 [project @ 1996-07-11 21:25:22 by jwe]
jwe
parents: 2311
diff changeset
10 ## Octave is distributed in the hope that it will be useful, but
5ca126254d15 [project @ 1996-07-11 21:25:22 by jwe]
jwe
parents: 2311
diff changeset
11 ## WITHOUT ANY WARRANTY; without even the implied warranty of
5ca126254d15 [project @ 1996-07-11 21:25:22 by jwe]
jwe
parents: 2311
diff changeset
12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5ca126254d15 [project @ 1996-07-11 21:25:22 by jwe]
jwe
parents: 2311
diff changeset
13 ## General Public License for more details.
5ca126254d15 [project @ 1996-07-11 21:25:22 by jwe]
jwe
parents: 2311
diff changeset
14 ##
5ca126254d15 [project @ 1996-07-11 21:25:22 by jwe]
jwe
parents: 2311
diff changeset
15 ## You should have received a copy of the GNU General Public License
7016
93c65f2a5668 [project @ 2007-10-12 06:40:56 by jwe]
jwe
parents: 6046
diff changeset
16 ## along with Octave; see the file COPYING. If not, see
93c65f2a5668 [project @ 2007-10-12 06:40:56 by jwe]
jwe
parents: 6046
diff changeset
17 ## <http://www.gnu.org/licenses/>.
2272
e97fba45f0a3 [project @ 1996-05-24 02:40:06 by jwe]
jwe
parents:
diff changeset
18
3361
4f40efa995c1 [project @ 1999-11-19 21:19:37 by jwe]
jwe
parents: 2355
diff changeset
19 ## -*- texinfo -*-
13177
17b702fae303 findstr.m: Use more modern code practices in function.
Rik <octave@nomad.inbox5.com>
parents: 11587
diff changeset
20 ## @deftypefn {Function File} {} findstr (@var{s}, @var{t})
17b702fae303 findstr.m: Use more modern code practices in function.
Rik <octave@nomad.inbox5.com>
parents: 11587
diff changeset
21 ## @deftypefnx {Function File} {} findstr (@var{s}, @var{t}, @var{overlap})
3361
4f40efa995c1 [project @ 1999-11-19 21:19:37 by jwe]
jwe
parents: 2355
diff changeset
22 ## Return the vector of all positions in the longer of the two strings
4f40efa995c1 [project @ 1999-11-19 21:19:37 by jwe]
jwe
parents: 2355
diff changeset
23 ## @var{s} and @var{t} where an occurrence of the shorter of the two starts.
13177
17b702fae303 findstr.m: Use more modern code practices in function.
Rik <octave@nomad.inbox5.com>
parents: 11587
diff changeset
24 ## If the optional argument @var{overlap} is true, the returned vector
10821
693e22af08ae Grammarcheck documentation of m-files
Rik <octave@nomad.inbox5.com>
parents: 10549
diff changeset
25 ## can include overlapping positions (this is the default). For example:
3426
f8dde1807dee [project @ 2000-01-13 08:40:00 by jwe]
jwe
parents: 3361
diff changeset
26 ##
3361
4f40efa995c1 [project @ 1999-11-19 21:19:37 by jwe]
jwe
parents: 2355
diff changeset
27 ## @example
8442
502e58a0d44f Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents: 7411
diff changeset
28 ## @group
3361
4f40efa995c1 [project @ 1999-11-19 21:19:37 by jwe]
jwe
parents: 2355
diff changeset
29 ## findstr ("ababab", "a")
13177
17b702fae303 findstr.m: Use more modern code practices in function.
Rik <octave@nomad.inbox5.com>
parents: 11587
diff changeset
30 ## @result{} [1, 3, 5];
3361
4f40efa995c1 [project @ 1999-11-19 21:19:37 by jwe]
jwe
parents: 2355
diff changeset
31 ## findstr ("abababa", "aba", 0)
8442
502e58a0d44f Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents: 7411
diff changeset
32 ## @result{} [1, 5]
502e58a0d44f Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents: 7411
diff changeset
33 ## @end group
3361
4f40efa995c1 [project @ 1999-11-19 21:19:37 by jwe]
jwe
parents: 2355
diff changeset
34 ## @end example
13177
17b702fae303 findstr.m: Use more modern code practices in function.
Rik <octave@nomad.inbox5.com>
parents: 11587
diff changeset
35 ##
17b702fae303 findstr.m: Use more modern code practices in function.
Rik <octave@nomad.inbox5.com>
parents: 11587
diff changeset
36 ## @strong{Caution:} @code{findstr} is scheduled for deprecation. Use
17b702fae303 findstr.m: Use more modern code practices in function.
Rik <octave@nomad.inbox5.com>
parents: 11587
diff changeset
37 ## @code{strfind} in all new code.
19957
e78c0514523d restore strmatch function; backout changeset f9959972949a
John W. Eaton <jwe@octave.org>
parents: 19833
diff changeset
38 ## @seealso{strfind, strmatch, strcmp, strncmp, strcmpi, strncmpi, find}
3361
4f40efa995c1 [project @ 1999-11-19 21:19:37 by jwe]
jwe
parents: 2355
diff changeset
39 ## @end deftypefn
2272
e97fba45f0a3 [project @ 1996-05-24 02:40:06 by jwe]
jwe
parents:
diff changeset
40
3891
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
41 ## Note that this implementation swaps the strings if second one is longer
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
42 ## than the first, so try to put the longer one first.
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
43 ##
5428
2a16423e4aa0 [project @ 2005-08-23 18:38:27 by jwe]
jwe
parents: 5400
diff changeset
44 ## Author: Kurt Hornik <Kurt.Hornik@wu-wien.ac.at>
2355
c9f70d39255f [project @ 1996-08-20 23:30:54 by jwe]
jwe
parents: 2325
diff changeset
45 ## Adapted-By: jwe
2314
949ab8eba8bc [project @ 1996-07-12 03:58:02 by jwe]
jwe
parents: 2313
diff changeset
46
13177
17b702fae303 findstr.m: Use more modern code practices in function.
Rik <octave@nomad.inbox5.com>
parents: 11587
diff changeset
47 function v = findstr (s, t, overlap = true)
2275
38fea6d34daf [project @ 1996-05-24 04:06:52 by jwe]
jwe
parents: 2272
diff changeset
48
38fea6d34daf [project @ 1996-05-24 04:06:52 by jwe]
jwe
parents: 2272
diff changeset
49 if (nargin < 2 || nargin > 3)
6046
34f96dd5441b [project @ 2006-10-10 16:10:25 by jwe]
jwe
parents: 5428
diff changeset
50 print_usage ();
2275
38fea6d34daf [project @ 1996-05-24 04:06:52 by jwe]
jwe
parents: 2272
diff changeset
51 endif
38fea6d34daf [project @ 1996-05-24 04:06:52 by jwe]
jwe
parents: 2272
diff changeset
52
5348
b3ba123faec8 [project @ 2005-05-11 17:11:48 by jwe]
jwe
parents: 5307
diff changeset
53 if (all (size (s) > 1) || all (size (t) > 1))
b3ba123faec8 [project @ 2005-05-11 17:11:48 by jwe]
jwe
parents: 5307
diff changeset
54 error ("findstr: arguments must have only one non-singleton dimension");
3891
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
55 endif
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
56
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
57 ## Make S be the longer string.
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
58 if (length (s) < length (t))
13177
17b702fae303 findstr.m: Use more modern code practices in function.
Rik <octave@nomad.inbox5.com>
parents: 11587
diff changeset
59 [s, t] = deal (t, s);
3891
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
60 endif
11587
c792872f8942 all script files: untabify and strip trailing whitespace
John W. Eaton <jwe@octave.org>
parents: 11523
diff changeset
61
3891
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
62 l_s = length (s);
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
63 l_t = length (t);
11587
c792872f8942 all script files: untabify and strip trailing whitespace
John W. Eaton <jwe@octave.org>
parents: 11523
diff changeset
64
3891
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
65 if (l_t == 0)
4321
8460c03f3b4b [project @ 2003-02-14 01:08:38 by jwe]
jwe
parents: 3891
diff changeset
66 ## zero length target: return empty set
8460c03f3b4b [project @ 2003-02-14 01:08:38 by jwe]
jwe
parents: 3891
diff changeset
67 v = [];
11587
c792872f8942 all script files: untabify and strip trailing whitespace
John W. Eaton <jwe@octave.org>
parents: 11523
diff changeset
68
3891
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
69 elseif (l_t == 1)
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
70 ## length one target: simple find
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
71 v = find (s == t);
11587
c792872f8942 all script files: untabify and strip trailing whitespace
John W. Eaton <jwe@octave.org>
parents: 11523
diff changeset
72
3891
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
73 elseif (l_t == 2)
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
74 ## length two target: find first at i and second at i+1
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
75 v = find (s(1:l_s-1) == t(1) & s(2:l_s) == t(2));
11587
c792872f8942 all script files: untabify and strip trailing whitespace
John W. Eaton <jwe@octave.org>
parents: 11523
diff changeset
76
3891
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
77 else
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
78 ## length three or more: match the first three by find then go through
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
79 ## the much smaller list to determine which of them are real matches
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
80 limit = l_s - l_t + 1;
19833
9fc020886ae9 maint: Clean up m-files to follow Octave coding conventions.
Rik <rik@octave.org>
parents: 19697
diff changeset
81 v = find ( s(1:limit) == t(1)
10549
95c3e38098bf Untabify .m scripts
Rik <code@nomad.inbox5.com>
parents: 8920
diff changeset
82 & s(2:limit+1) == t(2)
19833
9fc020886ae9 maint: Clean up m-files to follow Octave coding conventions.
Rik <rik@octave.org>
parents: 19697
diff changeset
83 & s(3:limit+2) == t(3));
3891
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
84 endif
3759
110bc441a954 [project @ 2000-12-16 01:25:12 by jwe]
jwe
parents: 3456
diff changeset
85
3891
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
86 ## Need to search the index vector if our find was too short
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
87 ## (target length > 3), or if we don't allow overlaps. Note though
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
88 ## that there cannot be any overlaps if the first character in the
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
89 ## target is different from the remaining characters in the target,
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
90 ## so a single character, two different characters, or first character
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
91 ## different from the second two don't need to be searched.
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
92 if (l_t >= 3 || (! overlap && l_t > 1 && any (t(1) == t(2:l_t))))
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
93 ## force strings to be both row vectors or both column vectors
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
94 if (all (size (s) != size (t)))
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
95 t = t.';
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
96 endif
11587
c792872f8942 all script files: untabify and strip trailing whitespace
John W. Eaton <jwe@octave.org>
parents: 11523
diff changeset
97
3891
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
98 ## determine which ones to keep
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
99 keep = zeros (size (v));
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
100 ind = 0:l_t-1;
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
101 if (overlap)
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
102 for idx = 1:length (v)
10549
95c3e38098bf Untabify .m scripts
Rik <code@nomad.inbox5.com>
parents: 8920
diff changeset
103 keep(idx) = all (s(v(idx) + ind) == t);
3891
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
104 endfor
2272
e97fba45f0a3 [project @ 1996-05-24 02:40:06 by jwe]
jwe
parents:
diff changeset
105 else
8506
bc982528de11 comment style fixes
John W. Eaton <jwe@octave.org>
parents: 8442
diff changeset
106 ## First possible position for next non-overlapping match.
bc982528de11 comment style fixes
John W. Eaton <jwe@octave.org>
parents: 8442
diff changeset
107 next = 1;
3891
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
108 for idx = 1:length (v)
10549
95c3e38098bf Untabify .m scripts
Rik <code@nomad.inbox5.com>
parents: 8920
diff changeset
109 if (v(idx) >= next && s(v(idx) + ind) == t)
95c3e38098bf Untabify .m scripts
Rik <code@nomad.inbox5.com>
parents: 8920
diff changeset
110 keep(idx) = 1;
95c3e38098bf Untabify .m scripts
Rik <code@nomad.inbox5.com>
parents: 8920
diff changeset
111 ## Skip to the next possible match position.
95c3e38098bf Untabify .m scripts
Rik <code@nomad.inbox5.com>
parents: 8920
diff changeset
112 next = v(idx) + l_t;
95c3e38098bf Untabify .m scripts
Rik <code@nomad.inbox5.com>
parents: 8920
diff changeset
113 else
95c3e38098bf Untabify .m scripts
Rik <code@nomad.inbox5.com>
parents: 8920
diff changeset
114 keep(idx) = 0;
95c3e38098bf Untabify .m scripts
Rik <code@nomad.inbox5.com>
parents: 8920
diff changeset
115 endif
3891
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
116 endfor
2272
e97fba45f0a3 [project @ 1996-05-24 02:40:06 by jwe]
jwe
parents:
diff changeset
117 endif
3891
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
118 if (! isempty (v))
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
119 v = v(find (keep));
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
120 endif
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
121 endif
5400
c7e3cf2fce3e [project @ 2005-07-05 15:01:32 by jwe]
jwe
parents: 5348
diff changeset
122
c7e3cf2fce3e [project @ 2005-07-05 15:01:32 by jwe]
jwe
parents: 5348
diff changeset
123 if (isempty (v))
c7e3cf2fce3e [project @ 2005-07-05 15:01:32 by jwe]
jwe
parents: 5348
diff changeset
124 v = [];
c7e3cf2fce3e [project @ 2005-07-05 15:01:32 by jwe]
jwe
parents: 5348
diff changeset
125 endif
c7e3cf2fce3e [project @ 2005-07-05 15:01:32 by jwe]
jwe
parents: 5348
diff changeset
126
13177
17b702fae303 findstr.m: Use more modern code practices in function.
Rik <octave@nomad.inbox5.com>
parents: 11587
diff changeset
127 ## Always return a row vector, because that's what the old one did.
17b702fae303 findstr.m: Use more modern code practices in function.
Rik <octave@nomad.inbox5.com>
parents: 11587
diff changeset
128 if (iscolumn (v))
3891
e2cbe8e31e06 [project @ 2002-04-04 23:38:33 by jwe]
jwe
parents: 3759
diff changeset
129 v = v.';
2272
e97fba45f0a3 [project @ 1996-05-24 02:40:06 by jwe]
jwe
parents:
diff changeset
130 endif
e97fba45f0a3 [project @ 1996-05-24 02:40:06 by jwe]
jwe
parents:
diff changeset
131
e97fba45f0a3 [project @ 1996-05-24 02:40:06 by jwe]
jwe
parents:
diff changeset
132 endfunction
7411
83a8781b529d [project @ 2008-01-22 21:52:25 by jwe]
jwe
parents: 7017
diff changeset
133
13177
17b702fae303 findstr.m: Use more modern code practices in function.
Rik <octave@nomad.inbox5.com>
parents: 11587
diff changeset
134
17b702fae303 findstr.m: Use more modern code practices in function.
Rik <octave@nomad.inbox5.com>
parents: 11587
diff changeset
135 %!assert (findstr ("abababa", "a"), [1, 3, 5, 7])
14363
f3d52523cde1 Use Octave coding conventions in all m-file %!test blocks
Rik <octave@nomad.inbox5.com>
parents: 14138
diff changeset
136 %!assert (findstr ("abababa", "aba"), [1, 3, 5])
f3d52523cde1 Use Octave coding conventions in all m-file %!test blocks
Rik <octave@nomad.inbox5.com>
parents: 14138
diff changeset
137 %!assert (findstr ("aba", "abababa", 0), [1, 5])
7411
83a8781b529d [project @ 2008-01-22 21:52:25 by jwe]
jwe
parents: 7017
diff changeset
138
19833
9fc020886ae9 maint: Clean up m-files to follow Octave coding conventions.
Rik <rik@octave.org>
parents: 19697
diff changeset
139 ## Test input validation
13177
17b702fae303 findstr.m: Use more modern code practices in function.
Rik <octave@nomad.inbox5.com>
parents: 11587
diff changeset
140 %!error findstr ()
14363
f3d52523cde1 Use Octave coding conventions in all m-file %!test blocks
Rik <octave@nomad.inbox5.com>
parents: 14138
diff changeset
141 %!error findstr ("foo", "bar", 3, 4)
f3d52523cde1 Use Octave coding conventions in all m-file %!test blocks
Rik <octave@nomad.inbox5.com>
parents: 14138
diff changeset
142 %!error <must have only one non-singleton dimension> findstr (["AB" ; "CD"], "C")
7411
83a8781b529d [project @ 2008-01-22 21:52:25 by jwe]
jwe
parents: 7017
diff changeset
143