Mercurial > octave-nkf
annotate src/DLD-FUNCTIONS/regexp.cc @ 12434:357d593d87c1 release-3-4-x
regexp.cc: avoid deprecated Array<T>:resize function
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Thu, 10 Feb 2011 00:58:31 -0500 |
parents | 4ced6b90fffb |
children | e4dbfe3019b1 |
rev | line source |
---|---|
5582 | 1 /* |
2 | |
11523 | 3 Copyright (C) 2005-2011 David Bateman |
4 Copyright (C) 2002-2005 Paul Kienzle | |
7016 | 5 |
6 This file is part of Octave. | |
5582 | 7 |
8 Octave is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by the | |
7016 | 10 Free Software Foundation; either version 3 of the License, or (at your |
11 option) any later version. | |
5582 | 12 |
13 Octave is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
7016 | 19 along with Octave; see the file COPYING. If not, see |
20 <http://www.gnu.org/licenses/>. | |
5582 | 21 |
22 */ | |
23 | |
24 #ifdef HAVE_CONFIG_H | |
25 #include <config.h> | |
26 #endif | |
27 | |
5773 | 28 #include <algorithm> |
5765 | 29 #include <sstream> |
30 | |
5582 | 31 #include "defun-dld.h" |
32 #include "error.h" | |
33 #include "gripes.h" | |
34 #include "oct-obj.h" | |
35 #include "utils.h" | |
36 | |
37 #include "Cell.h" | |
38 #include "oct-map.h" | |
39 #include "str-vec.h" | |
5785 | 40 #include "quit.h" |
41 #include "parse.h" | |
8377
25bc2d31e1bf
improve OCTAVE_LOCAL_BUFFER
Jaroslav Hajek <highegg@gmail.com>
parents:
8140
diff
changeset
|
42 #include "oct-locbuf.h" |
5582 | 43 |
7173 | 44 #if defined (HAVE_PCRE) |
5582 | 45 #include <pcre.h> |
7117 | 46 #elif defined (HAVE_REGEX) |
47 #if defined (__MINGW32__) | |
5582 | 48 #define __restrict |
49 #endif | |
7237 | 50 #include <sys/types.h> |
5582 | 51 #include <regex.h> |
52 #endif | |
53 | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
54 // Define the maximum number of retries for a pattern that |
8140
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
55 // possibly results in an infinite recursion. |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
56 #define PCRE_MATCHLIMIT_MAX 10 |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
57 |
5785 | 58 // The regexp is constructed as a linked list to avoid resizing the |
59 // return values in arrays at each new match. | |
60 | |
61 // FIXME don't bother collecting and composing return values the user | |
62 // doesn't want. | |
63 | |
64 class regexp_elem | |
5582 | 65 { |
5785 | 66 public: |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
67 regexp_elem (const string_vector& _named_token, const Cell& _t, |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
68 const std::string& _m, const Matrix& _te, double _s, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
69 double _e) : |
5785 | 70 named_token (_named_token), t (_t), m (_m), te (_te), s (_s), e (_e) { } |
71 | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
72 regexp_elem (const regexp_elem &a) : named_token (a.named_token), t (a.t), |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
73 m (a.m), te (a.te), s (a.s), e (a.e) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
74 { } |
5785 | 75 |
76 string_vector named_token; | |
77 Cell t; | |
78 std::string m; | |
79 Matrix te; | |
80 double s; | |
81 double e; | |
82 }; | |
83 | |
84 typedef std::list<regexp_elem>::const_iterator const_iterator; | |
85 | |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
86 #define MAXLOOKBEHIND 10 |
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
87 static bool lookbehind_warned = false; |
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
88 |
5785 | 89 static int |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
90 octregexp_list (const octave_value_list &args, const std::string &nm, |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
91 bool case_insensitive, std::list<regexp_elem> &lst, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
92 string_vector &named, int &nopts, bool &once) |
5785 | 93 { |
94 int sz = 0; | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
95 #if defined (HAVE_REGEX) || defined (HAVE_PCRE) |
5582 | 96 int nargin = args.length(); |
5779 | 97 bool lineanchors = false; |
98 bool dotexceptnewline = false; | |
99 bool freespacing = false; | |
5582 | 100 |
5785 | 101 nopts = nargin - 2; |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
102 once = false; |
5785 | 103 |
5582 | 104 std::string buffer = args(0).string_value (); |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
105 size_t max_length = (buffer.length () > MAXLOOKBEHIND ? |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
106 MAXLOOKBEHIND: buffer.length ()); |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
107 |
5582 | 108 if (error_state) |
109 { | |
110 gripe_wrong_type_arg (nm.c_str(), args(0)); | |
5785 | 111 return 0; |
5582 | 112 } |
113 | |
114 std::string pattern = args(1).string_value (); | |
115 if (error_state) | |
116 { | |
117 gripe_wrong_type_arg (nm.c_str(), args(1)); | |
5785 | 118 return 0; |
5582 | 119 } |
120 | |
121 for (int i = 2; i < nargin; i++) | |
122 { | |
123 std::string str = args(i).string_value(); | |
124 if (error_state) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
125 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
126 error ("%s: optional arguments must be strings", nm.c_str()); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
127 break; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
128 } |
5582 | 129 std::transform (str.begin (), str.end (), str.begin (), tolower); |
130 if (str.find("once", 0) == 0) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
131 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
132 once = true; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
133 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
134 } |
5779 | 135 else if (str.find("matchcase", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
136 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
137 case_insensitive = false; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
138 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
139 } |
5779 | 140 else if (str.find("ignorecase", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
141 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
142 case_insensitive = true; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
143 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
144 } |
5785 | 145 else if (str.find("dotall", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
146 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
147 dotexceptnewline = false; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
148 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
149 } |
5785 | 150 else if (str.find("stringanchors", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
151 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
152 lineanchors = false; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
153 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
154 } |
5779 | 155 else if (str.find("literalspacing", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
156 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
157 freespacing = false; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
158 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
159 } |
5785 | 160 #if HAVE_PCRE |
161 // Only accept these options with pcre | |
162 else if (str.find("dotexceptnewline", 0) == 0) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
163 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
164 dotexceptnewline = true; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
165 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
166 } |
5785 | 167 else if (str.find("lineanchors", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
168 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
169 lineanchors = true; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
170 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
171 } |
5785 | 172 else if (str.find("freespacing", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
173 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
174 freespacing = true; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
175 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
176 } |
5582 | 177 else if (str.find("start", 0) && str.find("end", 0) && |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
178 str.find("tokenextents", 0) && str.find("match", 0) && |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
179 str.find("tokens", 0) && str.find("names", 0)) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
180 error ("%s: unrecognized option", nm.c_str()); |
5582 | 181 #else |
5779 | 182 else if (str.find("names", 0) == 0 || |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
183 str.find("dotexceptnewline", 0) == 0 || |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
184 str.find("lineanchors", 0) == 0 || |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
185 str.find("freespacing", 0) == 0) |
5785 | 186 error ("%s: %s not implemented in this version", str.c_str(), nm.c_str()); |
5582 | 187 else if (str.find("start", 0) && str.find("end", 0) && |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
188 str.find("tokenextents", 0) && str.find("match", 0) && |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
189 str.find("tokens", 0)) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
190 error ("%s: unrecognized option", nm.c_str()); |
5582 | 191 #endif |
192 } | |
193 | |
194 if (!error_state) | |
195 { | |
5785 | 196 Cell t; |
197 std::string m; | |
198 double s, e; | |
5582 | 199 |
200 // named tokens "(?<name>...)" are only treated with PCRE not regex. | |
201 #if HAVE_PCRE | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
202 |
5619 | 203 size_t pos = 0; |
204 size_t new_pos; | |
205 int nnames = 0; | |
206 int inames = 0; | |
5765 | 207 std::ostringstream buf; |
5619 | 208 Array<int> named_idx; |
5582 | 209 |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
210 while ((new_pos = pattern.find ("(?",pos)) != std::string::npos) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
211 { |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
212 if (pattern.at (new_pos + 2) == '<' && |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
213 !(pattern.at (new_pos + 3) == '=' || |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
214 pattern.at (new_pos + 3) == '!')) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
215 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
216 // The syntax of named tokens in pcre is "(?P<name>...)" while |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
217 // we need a syntax "(?<name>...)", so fix that here. Also an |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
218 // expression like |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
219 // "(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)" |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
220 // should be perfectly legal, while pcre does not allow the same |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
221 // named token name on both sides of the alternative. Also fix |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
222 // that here by replacing name tokens by dummy names, and dealing |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
223 // with the dummy names later. |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
224 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
225 size_t tmp_pos = pattern.find_first_of ('>',new_pos); |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
226 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
227 if (tmp_pos == std::string::npos) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
228 { |
11553
01f703952eff
Improve docstrings for functions in DLD-FUNCTIONS directory.
Rik <octave@nomad.inbox5.com>
parents:
11523
diff
changeset
|
229 error ("regexp: syntax error in pattern"); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
230 break; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
231 } |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
232 |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
233 std::string tmp_name = |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
234 pattern.substr(new_pos+3,tmp_pos-new_pos-3); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
235 bool found = false; |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
236 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
237 for (int i = 0; i < nnames; i++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
238 if (named(i) == tmp_name) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
239 { |
12434
357d593d87c1
regexp.cc: avoid deprecated Array<T>:resize function
John W. Eaton <jwe@octave.org>
parents:
11590
diff
changeset
|
240 named_idx.resize (dim_vector (inames+1, 1)); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
241 named_idx(inames) = i; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
242 found = true; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
243 break; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
244 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
245 if (! found) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
246 { |
12434
357d593d87c1
regexp.cc: avoid deprecated Array<T>:resize function
John W. Eaton <jwe@octave.org>
parents:
11590
diff
changeset
|
247 named_idx.resize (dim_vector (inames+1, 1)); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
248 named_idx(inames) = nnames; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
249 named.append(tmp_name); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
250 nnames++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
251 } |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
252 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
253 if (new_pos - pos > 0) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
254 buf << pattern.substr(pos,new_pos-pos); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
255 if (inames < 10) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
256 buf << "(?P<n00" << inames++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
257 else if (inames < 100) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
258 buf << "(?P<n0" << inames++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
259 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
260 buf << "(?P<n" << inames++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
261 pos = tmp_pos; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
262 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
263 else if (pattern.at (new_pos + 2) == '<') |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
264 { |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
265 // Find lookbehind operators of arbitrary length (ie like |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
266 // "(?<=[a-z]*)") and replace with a maximum length operator |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
267 // as PCRE can not yet handle arbitrary length lookahead |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
268 // operators. Use the string length as the maximum length to |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
269 // avoid issues. |
5582 | 270 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
271 int brackets = 1; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
272 size_t tmp_pos1 = new_pos + 2; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
273 size_t tmp_pos2 = tmp_pos1; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
274 while (tmp_pos1 <= pattern.length () && brackets > 0) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
275 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
276 char ch = pattern.at (tmp_pos1); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
277 if (ch == '(') |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
278 brackets++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
279 else if (ch == ')') |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
280 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
281 if (brackets > 1) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
282 tmp_pos2 = tmp_pos1; |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
283 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
284 brackets--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
285 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
286 tmp_pos1++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
287 } |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
288 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
289 if (brackets != 0) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
290 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
291 buf << pattern.substr (pos, new_pos - pos) << "(?"; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
292 pos = new_pos + 2; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
293 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
294 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
295 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
296 size_t tmp_pos3 = pattern.find_first_of ("*+", tmp_pos2); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
297 if (tmp_pos3 != std::string::npos && tmp_pos3 < tmp_pos1) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
298 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
299 if (!lookbehind_warned) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
300 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
301 lookbehind_warned = true; |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
302 warning ("%s: arbitrary length lookbehind patterns are only supported up to length %d", nm.c_str(), MAXLOOKBEHIND); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
303 } |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
304 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
305 buf << pattern.substr (pos, new_pos - pos) << "("; |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
306 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
307 size_t i; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
308 if (pattern.at (tmp_pos3) == '*') |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
309 i = 0; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
310 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
311 i = 1; |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
312 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
313 for (; i < max_length + 1; i++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
314 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
315 buf << pattern.substr(new_pos, tmp_pos3 - new_pos) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
316 << "{" << i << "}"; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
317 buf << pattern.substr(tmp_pos3 + 1, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
318 tmp_pos1 - tmp_pos3 - 1); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
319 if (i != max_length) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
320 buf << "|"; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
321 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
322 buf << ")"; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
323 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
324 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
325 buf << pattern.substr (pos, tmp_pos1 - pos); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
326 pos = tmp_pos1; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
327 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
328 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
329 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
330 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
331 buf << pattern.substr (pos, new_pos - pos) << "(?"; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
332 pos = new_pos + 2; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
333 } |
5619 | 334 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
335 } |
5619 | 336 |
5765 | 337 buf << pattern.substr(pos); |
5619 | 338 |
339 if (error_state) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
340 return 0; |
5582 | 341 |
342 // Compile expression | |
343 pcre *re; | |
344 const char *err; | |
345 int erroffset; | |
5765 | 346 std::string buf_str = buf.str (); |
347 re = pcre_compile (buf_str.c_str (), | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
348 (case_insensitive ? PCRE_CASELESS : 0) | |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
349 (dotexceptnewline ? 0 : PCRE_DOTALL) | |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
350 (lineanchors ? PCRE_MULTILINE : 0) | |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
351 (freespacing ? PCRE_EXTENDED : 0), |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
352 &err, &erroffset, 0); |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
353 |
10550 | 354 if (re == 0) |
355 { | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
356 error("%s: %s at position %d of expression", nm.c_str(), |
10550 | 357 err, erroffset); |
358 return 0; | |
359 } | |
5582 | 360 |
361 int subpatterns; | |
362 int namecount; | |
363 int nameentrysize; | |
364 char *nametable; | |
365 int idx = 0; | |
366 | |
7520 | 367 pcre_fullinfo(re, 0, PCRE_INFO_CAPTURECOUNT, &subpatterns); |
368 pcre_fullinfo(re, 0, PCRE_INFO_NAMECOUNT, &namecount); | |
369 pcre_fullinfo(re, 0, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize); | |
370 pcre_fullinfo(re, 0, PCRE_INFO_NAMETABLE, &nametable); | |
5582 | 371 |
372 OCTAVE_LOCAL_BUFFER(int, ovector, (subpatterns+1)*3); | |
373 OCTAVE_LOCAL_BUFFER(int, nidx, namecount); | |
374 | |
375 for (int i = 0; i < namecount; i++) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
376 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
377 // Index of subpattern in first two bytes MSB first of name. |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
378 // Extract index. |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
379 nidx[i] = (static_cast<int>(nametable[i*nameentrysize])) << 8 | |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
380 static_cast<int>(nametable[i*nameentrysize+1]); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
381 } |
5582 | 382 |
383 while(true) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
384 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
385 OCTAVE_QUIT; |
5785 | 386 |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
387 int matches = pcre_exec(re, 0, buffer.c_str(), |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
388 buffer.length(), idx, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
389 (idx ? PCRE_NOTBOL : 0), |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
390 ovector, (subpatterns+1)*3); |
5582 | 391 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
392 if (matches == PCRE_ERROR_MATCHLIMIT) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
393 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
394 // try harder; start with default value for MATCH_LIMIT and increase it |
11590
4ced6b90fffb
style fixes for warning and error messages in source files
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
395 warning ("your pattern caused PCRE to hit its MATCH_LIMIT; trying harder now, but this will be slow"); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
396 pcre_extra pe; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
397 pcre_config(PCRE_CONFIG_MATCH_LIMIT, static_cast <void *> (&pe.match_limit)); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
398 pe.flags = PCRE_EXTRA_MATCH_LIMIT; |
8140
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
399 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
400 int i = 0; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
401 while (matches == PCRE_ERROR_MATCHLIMIT && |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
402 i++ < PCRE_MATCHLIMIT_MAX) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
403 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
404 OCTAVE_QUIT; |
8140
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
405 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
406 pe.match_limit *= 10; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
407 matches = pcre_exec(re, &pe, buffer.c_str(), |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
408 buffer.length(), idx, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
409 (idx ? PCRE_NOTBOL : 0), |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
410 ovector, (subpatterns+1)*3); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
411 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
412 } |
8140
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
413 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
414 if (matches < 0 && matches != PCRE_ERROR_NOMATCH) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
415 { |
11590
4ced6b90fffb
style fixes for warning and error messages in source files
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
416 error ("%s: internal error calling pcre_exec; error code from pcre_exec is %i", |
4ced6b90fffb
style fixes for warning and error messages in source files
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
417 nm.c_str(), matches); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
418 pcre_free(re); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
419 return 0; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
420 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
421 else if (matches == PCRE_ERROR_NOMATCH) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
422 break; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
423 else if (ovector[1] <= ovector[0]) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
424 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
425 // FIXME: Zero sized match!! Is this the right thing to do? |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
426 idx = ovector[0] + 1; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
427 continue; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
428 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
429 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
430 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
431 int pos_match = 0; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
432 Matrix te(matches-1,2); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
433 for (int i = 1; i < matches; i++) |
10518
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
434 if (ovector[2*i] >= 0 && ovector[2*i+1] > 0) |
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
435 if (i == 1 || ovector[2*i] != ovector[2*i-2] |
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
436 || ovector[2*i-1] != ovector[2*i+1]) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
437 { |
10518
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
438 if (ovector[2*i] >= 0 && ovector[2*i+1] > 0) |
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
439 { |
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
440 te(pos_match,0) = double (ovector[2*i]+1); |
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
441 te(pos_match++,1) = double (ovector[2*i+1]); |
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
442 } |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
443 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
444 te.resize(pos_match,2); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
445 s = double (ovector[0]+1); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
446 e = double (ovector[1]); |
5582 | 447 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
448 const char **listptr; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
449 int status = pcre_get_substring_list(buffer.c_str(), ovector, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
450 matches, &listptr); |
5582 | 451 |
10550 | 452 if (status == PCRE_ERROR_NOMEMORY) |
453 { | |
454 error("%s: cannot allocate memory in pcre_get_substring_list", | |
455 nm.c_str()); | |
456 pcre_free(re); | |
457 return 0; | |
458 } | |
5582 | 459 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
460 Cell cell_t (dim_vector(1,pos_match)); |
10518
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
461 string_vector named_tokens(nnames); |
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
462 int pos_offset = 0; |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
463 pos_match = 0; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
464 for (int i = 1; i < matches; i++) |
10550 | 465 if (ovector[2*i] >= 0 && ovector[2*i+1] > 0) |
466 { | |
467 if (i == 1 || ovector[2*i] != ovector[2*i-2] | |
468 || ovector[2*i-1] != ovector[2*i+1]) | |
469 { | |
470 if (namecount > 0) | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
471 named_tokens(named_idx(i-pos_offset-1)) = |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
472 std::string(*(listptr+nidx[i-pos_offset-1])); |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
473 cell_t(pos_match++) = |
10550 | 474 std::string(*(listptr+i)); |
475 } | |
476 else | |
477 pos_offset++; | |
10546
f5b8b28917a2
Eliminate compile warning about explicit braces
Rik <code@nomad.inbox5.com>
parents:
10518
diff
changeset
|
478 } |
5582 | 479 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
480 m = std::string(*listptr); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
481 t = cell_t; |
5785 | 482 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
483 pcre_free_substring_list(listptr); |
5582 | 484 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
485 regexp_elem new_elem (named_tokens, t, m, te, s, e); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
486 lst.push_back (new_elem); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
487 idx = ovector[1]; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
488 sz++; |
5785 | 489 |
11002
2538d03489cc
avoid infinite loop in regexp searches with PCRE
John W. Eaton <jwe@octave.org>
parents:
10846
diff
changeset
|
490 if (once || idx >= buffer.length ()) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
491 break; |
5582 | 492 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
493 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
494 } |
5582 | 495 |
496 pcre_free(re); | |
497 #else | |
498 regex_t compiled; | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
499 int err=regcomp(&compiled, pattern.c_str(), REG_EXTENDED | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
500 (case_insensitive ? REG_ICASE : 0)); |
5582 | 501 if (err) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
502 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
503 int len = regerror(err, &compiled, 0, 0); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
504 OCTAVE_LOCAL_BUFFER (char, errmsg, len); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
505 regerror(err, &compiled, errmsg, len); |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
506 error("%s: %s in pattern (%s)", nm.c_str(), errmsg, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
507 pattern.c_str()); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
508 regfree(&compiled); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
509 return 0; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
510 } |
5582 | 511 |
512 int subexpr = 1; | |
513 int idx = 0; | |
514 for (unsigned int i=0; i < pattern.length(); i++) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
515 subexpr += ( pattern[i] == '(' ? 1 : 0 ); |
5582 | 516 OCTAVE_LOCAL_BUFFER (regmatch_t, match, subexpr ); |
517 | |
518 while(true) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
519 { |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
520 OCTAVE_QUIT; |
5785 | 521 |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
522 if (regexec(&compiled, buffer.c_str() + idx, subexpr, |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
523 match, (idx ? REG_NOTBOL : 0)) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
524 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
525 // Count actual matches |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
526 int matches = 0; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
527 while (matches < subexpr && match[matches].rm_so >= 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
528 matches++; |
5582 | 529 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
530 if (matches == 0 || match[0].rm_eo == 0) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
531 break; |
8619
930a8114197b
For zero length matches in regexp, advance index by one and try again
David Bateman <dbateman@free.fr>
parents:
8477
diff
changeset
|
532 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
533 s = double (match[0].rm_so+1+idx); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
534 e = double (match[0].rm_eo+idx); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
535 Matrix te(matches-1,2); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
536 for (int i = 1; i < matches; i++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
537 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
538 te(i-1,0) = double (match[i].rm_so+1+idx); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
539 te(i-1,1) = double (match[i].rm_eo+idx); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
540 } |
5582 | 541 |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
542 m = buffer.substr (match[0].rm_so+idx, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
543 match[0].rm_eo-match[0].rm_so); |
5582 | 544 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
545 Cell cell_t (dim_vector(1,matches-1)); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
546 for (int i = 1; i < matches; i++) |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
547 cell_t(i-1) = buffer.substr (match[i].rm_so+idx, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
548 match[i].rm_eo-match[i].rm_so); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
549 t = cell_t; |
5582 | 550 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
551 idx += match[0].rm_eo; |
5785 | 552 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
553 string_vector sv; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
554 regexp_elem new_elem (sv, t, m, te, s, e); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
555 lst.push_back (new_elem); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
556 sz++; |
5582 | 557 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
558 if (once) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
559 break; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
560 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
561 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
562 break; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
563 } |
5582 | 564 regfree(&compiled); |
565 #endif | |
5785 | 566 } |
567 #else | |
568 error ("%s: not available in this version of Octave", nm.c_str()); | |
569 #endif | |
570 return sz; | |
571 } | |
5582 | 572 |
5785 | 573 static octave_value_list |
574 octregexp (const octave_value_list &args, int nargout, const std::string &nm, | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
575 bool case_insensitive) |
5785 | 576 { |
577 octave_value_list retval; | |
578 int nargin = args.length(); | |
579 std::list<regexp_elem> lst; | |
580 string_vector named; | |
581 int nopts; | |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
582 bool once; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
583 int sz = octregexp_list (args, nm, case_insensitive, lst, named, nopts, once); |
5785 | 584 |
585 if (! error_state) | |
586 { | |
587 // Converted the linked list in the correct form for the return values | |
588 | |
589 octave_idx_type i = 0; | |
590 #ifdef HAVE_PCRE | |
11045
cc3aad9dd3ef
dispatch.cc, fltk_backend.cc, regexp.cc: use octave_scalar_map instead of Octave_map
John W. Eaton <jwe@octave.org>
parents:
11032
diff
changeset
|
591 octave_scalar_map nmap; |
5785 | 592 if (sz == 1) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
593 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
594 for (int j = 0; j < named.length(); j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
595 nmap.assign (named(j), lst.begin()->named_token(j)); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
596 retval(5) = nmap; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
597 } |
5785 | 598 else |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
599 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
600 for (int j = 0; j < named.length (); j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
601 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
602 i = 0; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
603 Cell tmp(dim_vector (1, sz)); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
604 for (const_iterator p = lst.begin(); p != lst.end(); p++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
605 tmp(i++) = p->named_token(j); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
606 nmap.assign (named(j), octave_value (tmp)); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
607 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
608 retval(5) = nmap; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
609 } |
5785 | 610 #else |
11045
cc3aad9dd3ef
dispatch.cc, fltk_backend.cc, regexp.cc: use octave_scalar_map instead of Octave_map
John W. Eaton <jwe@octave.org>
parents:
11032
diff
changeset
|
611 retval(5) = octave_scalar_map (); |
5785 | 612 #endif |
613 | |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
614 if (once) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
615 retval(4) = sz ? lst.front ().t : Cell(); |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
616 else |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
617 { |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
618 Cell t (dim_vector(1, sz)); |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
619 i = 0; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
620 for (const_iterator p = lst.begin(); p != lst.end(); p++) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
621 t(i++) = p->t; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
622 retval(4) = t; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
623 } |
5785 | 624 |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
625 if (once) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
626 retval(3) = sz ? lst.front ().m : std::string(); |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
627 else |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
628 { |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
629 Cell m (dim_vector(1, sz)); |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
630 i = 0; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
631 for (const_iterator p = lst.begin(); p != lst.end(); p++) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
632 m(i++) = p->m; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
633 retval(3) = m; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
634 } |
5785 | 635 |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
636 if (once) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
637 retval(2) = sz ? lst.front ().te : Matrix(); |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
638 else |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
639 { |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
640 Cell te (dim_vector(1, sz)); |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
641 i = 0; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
642 for (const_iterator p = lst.begin(); p != lst.end(); p++) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
643 te(i++) = p->te; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
644 retval(2) = te; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
645 } |
5785 | 646 |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
647 if (once) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
648 { |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
649 if (sz) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
650 retval(1) = lst.front ().e; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
651 else |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
652 retval(1) = Matrix(); |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
653 } |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
654 else |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
655 { |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
656 NDArray e (dim_vector(1, sz)); |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
657 i = 0; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
658 for (const_iterator p = lst.begin(); p != lst.end(); p++) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
659 e(i++) = p->e; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
660 retval(1) = e; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
661 } |
5785 | 662 |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
663 if (once) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
664 { |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
665 if (sz) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
666 retval(0) = lst.front ().s; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
667 else |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
668 retval(0) = Matrix(); |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
669 } |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
670 else |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
671 { |
10551 | 672 NDArray s (dim_vector(1, sz)); |
673 | |
674 i = 0; | |
675 for (const_iterator p = lst.begin(); p != lst.end(); p++) | |
676 s(i++) = p->s; | |
677 retval(0) = s; | |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
678 } |
5582 | 679 |
680 // Alter the order of the output arguments | |
681 if (nopts > 0) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
682 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
683 int n = 0; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
684 octave_value_list new_retval; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
685 new_retval.resize(nargout); |
5582 | 686 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
687 OCTAVE_LOCAL_BUFFER (int, arg_used, 6); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
688 for (int j = 0; j < 6; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
689 arg_used[j] = false; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
690 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
691 for (int j = 2; j < nargin; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
692 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
693 int k = 0; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
694 std::string str = args(j).string_value(); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
695 std::transform (str.begin (), str.end (), str.begin (), tolower); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
696 if (str.find("once", 0) == 0 |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
697 || str.find("stringanchors", 0) == 0 |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
698 || str.find("lineanchors", 0) == 0 |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
699 || str.find("matchcase", 0) == 0 |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
700 || str.find("ignorecase", 0) == 0 |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
701 || str.find("dotall", 0) == 0 |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
702 || str.find("dotexceptnewline", 0) == 0 |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
703 || str.find("literalspacing", 0) == 0 |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
704 || str.find("freespacing", 0) == 0 |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
705 ) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
706 continue; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
707 else if (str.find("start", 0) == 0) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
708 k = 0; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
709 else if (str.find("end", 0) == 0) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
710 k = 1; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
711 else if (str.find("tokenextents", 0) == 0) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
712 k = 2; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
713 else if (str.find("match", 0) == 0) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
714 k = 3; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
715 else if (str.find("tokens", 0) == 0) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
716 k = 4; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
717 else if (str.find("names", 0) == 0) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
718 k = 5; |
5582 | 719 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
720 new_retval(n++) = retval(k); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
721 arg_used[k] = true; |
5582 | 722 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
723 if (n == nargout) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
724 break; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
725 } |
5582 | 726 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
727 // Fill in the rest of the arguments |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
728 if (n < nargout) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
729 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
730 for (int j = 0; j < 6; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
731 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
732 if (! arg_used[j]) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
733 new_retval(n++) = retval(j); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
734 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
735 } |
5582 | 736 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
737 retval = new_retval; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
738 } |
5582 | 739 } |
740 | |
741 return retval; | |
742 } | |
743 | |
6361 | 744 static octave_value_list |
745 octcellregexp (const octave_value_list &args, int nargout, const std::string &nm, | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
746 bool case_insensitive) |
6361 | 747 { |
748 octave_value_list retval; | |
749 | |
750 if (args(0).is_cell()) | |
751 { | |
752 OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout); | |
753 octave_value_list new_args = args; | |
754 Cell cellstr = args(0).cell_value(); | |
755 if (args(1).is_cell()) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
756 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
757 Cell cellpat = args(1).cell_value(); |
6361 | 758 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
759 if (cellpat.numel() == 1) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
760 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
761 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
762 newretval[j].resize(cellstr.dims()); |
6361 | 763 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
764 new_args(1) = cellpat(0); |
6361 | 765 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
766 for (octave_idx_type i = 0; i < cellstr.numel (); i++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
767 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
768 new_args(0) = cellstr(i); |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
769 octave_value_list tmp = octregexp (new_args, nargout, nm, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
770 case_insensitive); |
6361 | 771 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
772 if (error_state) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
773 break; |
6361 | 774 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
775 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
776 newretval[j](i) = tmp(j); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
777 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
778 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
779 else if (cellstr.numel() == 1) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
780 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
781 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
782 newretval[j].resize(cellpat.dims()); |
6361 | 783 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
784 new_args(0) = cellstr(0); |
6361 | 785 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
786 for (octave_idx_type i = 0; i < cellpat.numel (); i++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
787 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
788 new_args(1) = cellpat(i); |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
789 octave_value_list tmp = octregexp (new_args, nargout, nm, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
790 case_insensitive); |
6361 | 791 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
792 if (error_state) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
793 break; |
6361 | 794 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
795 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
796 newretval[j](i) = tmp(j); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
797 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
798 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
799 else if (cellstr.numel() == cellpat.numel()) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
800 { |
6361 | 801 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
802 if (cellstr.dims() != cellpat.dims()) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
803 error ("%s: Inconsistent cell array dimensions", nm.c_str()); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
804 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
805 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
806 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
807 newretval[j].resize(cellstr.dims()); |
6361 | 808 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
809 for (octave_idx_type i = 0; i < cellstr.numel (); i++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
810 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
811 new_args(0) = cellstr(i); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
812 new_args(1) = cellpat(i); |
6361 | 813 |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
814 octave_value_list tmp = octregexp (new_args, nargout, nm, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
815 case_insensitive); |
6361 | 816 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
817 if (error_state) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
818 break; |
6361 | 819 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
820 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
821 newretval[j](i) = tmp(j); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
822 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
823 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
824 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
825 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
826 error ("regexp: cell array arguments must be scalar or equal size"); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
827 } |
6361 | 828 else |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
829 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
830 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
831 newretval[j].resize(cellstr.dims()); |
6361 | 832 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
833 for (octave_idx_type i = 0; i < cellstr.numel (); i++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
834 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
835 new_args(0) = cellstr(i); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
836 octave_value_list tmp = octregexp (new_args, nargout, nm, case_insensitive); |
6361 | 837 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
838 if (error_state) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
839 break; |
6361 | 840 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
841 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
842 newretval[j](i) = tmp(j); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
843 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
844 } |
6361 | 845 |
846 if (!error_state) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
847 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
848 retval(j) = octave_value (newretval[j]); |
6361 | 849 } |
850 else if (args(1).is_cell()) | |
851 { | |
852 OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout); | |
853 octave_value_list new_args = args; | |
854 Cell cellpat = args(1).cell_value(); | |
855 | |
856 for (int j = 0; j < nargout; j++) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
857 newretval[j].resize(cellpat.dims()); |
6361 | 858 |
859 for (octave_idx_type i = 0; i < cellpat.numel (); i++) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
860 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
861 new_args(1) = cellpat(i); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
862 octave_value_list tmp = octregexp (new_args, nargout, nm, case_insensitive); |
6361 | 863 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
864 if (error_state) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
865 break; |
6361 | 866 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
867 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
868 newretval[j](i) = tmp(j); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
869 } |
6361 | 870 |
871 if (!error_state) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
872 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
873 retval(j) = octave_value (newretval[j]); |
6361 | 874 } |
875 else | |
876 retval = octregexp (args, nargout, nm, case_insensitive); | |
877 | |
878 return retval; | |
879 | |
880 } | |
881 | |
5582 | 882 DEFUN_DLD (regexp, args, nargout, |
883 "-*- texinfo -*-\n\ | |
10840 | 884 @deftypefn {Loadable Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}] =} regexp (@var{str}, @var{pat})\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
885 @deftypefnx {Loadable Function} {[@dots{}] =} regexp (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
886 Regular expression string matching. Search for @var{pat} in @var{str} and\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
887 return the positions and substrings of any matches, or empty values if there\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
888 are none. Note, some features and extended options are only available when\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
889 Octave is compiled with support for Perl Compatible Regular Expressions\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
890 (PCRE).\n\ |
5582 | 891 \n\ |
892 The matched pattern @var{pat} can include any of the standard regex\n\ | |
893 operators, including:\n\ | |
894 \n\ | |
895 @table @code\n\ | |
896 @item .\n\ | |
897 Match any character\n\ | |
10840 | 898 \n\ |
5582 | 899 @item * + ? @{@}\n\ |
900 Repetition operators, representing\n\ | |
901 @table @code\n\ | |
902 @item *\n\ | |
903 Match zero or more times\n\ | |
10840 | 904 \n\ |
5582 | 905 @item +\n\ |
906 Match one or more times\n\ | |
10840 | 907 \n\ |
5582 | 908 @item ?\n\ |
909 Match zero or one times\n\ | |
10840 | 910 \n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
911 @item @{@var{n}@}\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
912 Match exactly @var{n} times\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
913 \n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
914 @item @{@var{n},@}\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
915 Match @var{n} or more times\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
916 \n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
917 @item @{@var{m},@var{n}@}\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
918 Match between @var{m} and @var{n} times\n\ |
5582 | 919 @end table\n\ |
10840 | 920 \n\ |
5582 | 921 @item [@dots{}] [^@dots{}]\n\ |
10840 | 922 \n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
923 List operators. The pattern will match any character listed between \"[\"\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
924 and \"]\". If the first character is \"^\" then the pattern is inverted and\n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
925 any character except those listed between brackets will match.\n\ |
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
926 \n\ |
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
927 With PCRE support, escape sequences defined below can be used inside list\n\ |
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
928 operators. For example, a template for a floating point number might be\n\ |
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
929 @code{[-+.\\d]+}. POSIX regular expressions do not use escape sequences\n\ |
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
930 and any backslash @samp{\\} will be interpreted literally as one\n\ |
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
931 of the list of characters to match.\n\ |
10840 | 932 \n\ |
5582 | 933 @item ()\n\ |
934 Grouping operator\n\ | |
10840 | 935 \n\ |
5582 | 936 @item |\n\ |
9036
58604c45ca74
Cleanup of data types related documentation
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
937 Alternation operator. Match one of a choice of regular expressions. The\n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
938 alternatives must be delimited by the grouping operator @code{()} above.\n\ |
10840 | 939 \n\ |
5582 | 940 @item ^ $\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
941 Anchoring operators. Requires pattern to occur at the start (@code{^}) or\n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
942 end (@code{$}) of the string.\n\ |
5582 | 943 @end table\n\ |
944 \n\ | |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
945 In addition, the following escaped characters have special meaning. Note,\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
946 it is recommended to quote @var{pat} in single quotes, rather than double\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
947 quotes, to avoid the escape sequences being interpreted by Octave before\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
948 being passed to @code{regexp}.\n\ |
5582 | 949 \n\ |
950 @table @code\n\ | |
951 @item \\b\n\ | |
952 Match a word boundary\n\ | |
10840 | 953 \n\ |
5582 | 954 @item \\B\n\ |
955 Match within a word\n\ | |
10840 | 956 \n\ |
5582 | 957 @item \\w\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
958 Match any word character\n\ |
10840 | 959 \n\ |
5582 | 960 @item \\W\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
961 Match any non-word character\n\ |
10840 | 962 \n\ |
5582 | 963 @item \\<\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
964 Match the beginning of a word\n\ |
10840 | 965 \n\ |
5582 | 966 @item \\>\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
967 Match the end of a word\n\ |
10840 | 968 \n\ |
5582 | 969 @item \\s\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
970 Match any whitespace character\n\ |
10840 | 971 \n\ |
5582 | 972 @item \\S\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
973 Match any non-whitespace character\n\ |
10840 | 974 \n\ |
5582 | 975 @item \\d\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
976 Match any digit\n\ |
10840 | 977 \n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
978 This sequence is only available with PCRE support. For POSIX regular\n\ |
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
979 expressions use the following list operator @code{[0-9]}.\n\ |
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
980 \n\ |
5582 | 981 @item \\D\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
982 Match any non-digit\n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
983 \n\ |
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
984 This sequence is only available with PCRE support. For POSIX regular\n\ |
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
985 expressions use the following list operator @code{[^0-9]}.\n\ |
5582 | 986 @end table\n\ |
987 \n\ | |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
988 The outputs of @code{regexp} default to the order given below\n\ |
5582 | 989 \n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
990 @table @var\n\ |
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
991 @item s\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
992 The start indices of each matching substring\n\ |
5582 | 993 \n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
994 @item e\n\ |
5582 | 995 The end indices of each matching substring\n\ |
996 \n\ | |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
997 @item te\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
998 The extents of each matched token surrounded by @code{(@dots{})} in\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
999 @var{pat}\n\ |
5582 | 1000 \n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
1001 @item m\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1002 A cell array of the text of each match\n\ |
5582 | 1003 \n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
1004 @item t\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1005 A cell array of the text of each token matched\n\ |
5582 | 1006 \n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
1007 @item nm\n\ |
5582 | 1008 A structure containing the text of each matched named token, with the name\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1009 being used as the fieldname. A named token is denoted by\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1010 @code{(?<name>@dots{})} and is only available with PCRE support.\n\ |
5582 | 1011 @end table\n\ |
1012 \n\ | |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1013 Particular output arguments, or the order of the output arguments, can be\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1014 selected by additional @var{opt} arguments. These are strings and the\n\ |
5582 | 1015 correspondence between the output arguments and the optional argument\n\ |
1016 are\n\ | |
1017 \n\ | |
1018 @multitable @columnfractions 0.2 0.3 0.3 0.2\n\ | |
1019 @item @tab 'start' @tab @var{s} @tab\n\ | |
1020 @item @tab 'end' @tab @var{e} @tab\n\ | |
1021 @item @tab 'tokenExtents' @tab @var{te} @tab\n\ | |
1022 @item @tab 'match' @tab @var{m} @tab\n\ | |
1023 @item @tab 'tokens' @tab @var{t} @tab\n\ | |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1024 @item @tab 'names' @tab @var{nm} @tab\n\ |
5582 | 1025 @end multitable\n\ |
1026 \n\ | |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1027 Additional arguments are summarized below.\n\ |
5779 | 1028 \n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1029 @table @samp\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1030 @item once\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1031 Return only the first occurrence of the pattern.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1032 \n\ |
5779 | 1033 @item matchcase\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1034 Make the matching case sensitive. (default)\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1035 \n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1036 Alternatively, use (?-i) in the pattern when PCRE is available.\n\ |
10840 | 1037 \n\ |
5779 | 1038 @item ignorecase\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1039 Ignore case when matching the pattern to the string.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1040 \n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1041 Alternatively, use (?i) in the pattern when PCRE is available.\n\ |
10840 | 1042 \n\ |
5779 | 1043 @item stringanchors\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1044 Match the anchor characters at the beginning and end of the string. \n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1045 (default)\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1046 \n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1047 Alternatively, use (?-m) in the pattern when PCRE is available.\n\ |
10840 | 1048 \n\ |
5779 | 1049 @item lineanchors\n\ |
1050 Match the anchor characters at the beginning and end of the line.\n\ | |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1051 Only available when Octave is compiled with PCRE.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1052 \n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1053 Alternatively, use (?m) in the pattern when PCRE is available.\n\ |
10840 | 1054 \n\ |
5779 | 1055 @item dotall\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1056 The pattern @code{.} matches all characters including the newline character.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1057 (default)\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1058 \n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1059 Alternatively, use (?s) in the pattern when PCRE is available.\n\ |
10840 | 1060 \n\ |
5779 | 1061 @item dotexceptnewline\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1062 The pattern @code{.} matches all characters except the newline character.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1063 Only available when Octave is compiled with PCRE.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1064 \n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1065 Alternatively, use (?-s) in the pattern when PCRE is available.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1066 \n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1067 @item literalspacing\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1068 All characters in the pattern, including whitespace, are significant and are\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1069 used in pattern matching. (default)\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1070 \n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1071 Alternatively, use (?-x) in the pattern when PCRE is available.\n\ |
10840 | 1072 \n\ |
5779 | 1073 @item freespacing\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1074 The pattern may include arbitrary whitespace and also comments beginning with\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1075 the character @samp{#}.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1076 Only available when Octave is compiled with PCRE.\n\ |
10840 | 1077 \n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1078 Alternatively, use (?x) in the pattern when PCRE is available.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1079 \n\ |
5779 | 1080 @end table\n\ |
11572
7d6d8c1e471f
Grammarcheck Texinfo for files in src directory.
Rik <octave@nomad.inbox5.com>
parents:
11553
diff
changeset
|
1081 @seealso{regexpi, strfind, regexprep}\n\ |
5582 | 1082 @end deftypefn") |
1083 { | |
6361 | 1084 octave_value_list retval; |
1085 int nargin = args.length(); | |
1086 | |
1087 if (nargin < 2) | |
1088 print_usage (); | |
1089 else if (args(0).is_cell() || args(1).is_cell()) | |
1090 retval = octcellregexp (args, nargout, "regexp", false); | |
1091 else | |
1092 retval = octregexp (args, nargout, "regexp", false); | |
1093 | |
1094 return retval; | |
5582 | 1095 } |
1096 | |
1097 /* | |
1098 | |
8140
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1099 ## PCRE_ERROR_MATCHLIMIT test |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1100 %!test |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1101 %! s=sprintf('\t4\n0000\t-0.00\t-0.0000\t4\t-0.00\t-0.0000\t4\n0000\t-0.00\t-0.0000\t0\t-0.00\t-'); |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1102 %! ws = warning("query"); |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1103 %! unwind_protect |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1104 %! warning("off"); |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1105 %! regexp(s, '(\s*-*\d+[.]*\d*\s*)+\n'); |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1106 %! unwind_protect_cleanup |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1107 %! warning(ws); |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1108 %! end_unwind_protect |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1109 |
5582 | 1110 ## seg-fault test |
1111 %!assert(regexp("abcde","."),[1,2,3,4,5]) | |
1112 | |
1113 ## Check that anchoring of pattern works correctly | |
1114 %!assert(regexp('abcabc','^abc'),1); | |
1115 %!assert(regexp('abcabc','abc$'),4); | |
5785 | 1116 %!assert(regexp('abcabc','^abc$'),zeros(1,0)); |
5582 | 1117 |
1118 %!test | |
1119 %! [s, e, te, m, t] = regexp(' No Match ', 'f(.*)uck'); | |
5785 | 1120 %! assert (s,zeros(1,0)) |
1121 %! assert (e,zeros(1,0)) | |
1122 %! assert (te,cell(1,0)) | |
1123 %! assert (m, cell(1,0)) | |
1124 %! assert (t, cell(1,0)) | |
5582 | 1125 |
1126 %!test | |
1127 %! [s, e, te, m, t] = regexp(' FiRetrUck ', 'f(.*)uck'); | |
5785 | 1128 %! assert (s,zeros(1,0)) |
1129 %! assert (e,zeros(1,0)) | |
1130 %! assert (te,cell(1,0)) | |
1131 %! assert (m, cell(1,0)) | |
1132 %! assert (t, cell(1,0)) | |
5582 | 1133 |
1134 %!test | |
1135 %! [s, e, te, m, t] = regexp(' firetruck ', 'f(.*)uck'); | |
1136 %! assert (s,2) | |
1137 %! assert (e,10) | |
1138 %! assert (te{1},[3,7]) | |
1139 %! assert (m{1}, 'firetruck') | |
1140 %! assert (t{1}{1}, 'iretr') | |
1141 | |
1142 %!test | |
1143 %! [s, e, te, m, t] = regexp('short test string','\w*r\w*'); | |
1144 %! assert (s,[1,12]) | |
1145 %! assert (e,[5,17]) | |
1146 %! assert (size(te), [1,2]) | |
1147 %! assert (isempty(te{1})) | |
1148 %! assert (isempty(te{2})) | |
1149 %! assert (m{1},'short') | |
1150 %! assert (m{2},'string') | |
1151 %! assert (size(t), [1,2]) | |
1152 %! assert (isempty(t{1})) | |
1153 %! assert (isempty(t{2})) | |
1154 | |
1155 %!test | |
1156 %! [s, e, te, m, t] = regexp('short test string','\w*r\w*','once'); | |
1157 %! assert (s,1) | |
1158 %! assert (e,5) | |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1159 %! assert (isempty(te)) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1160 %! assert (m,'short') |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1161 %! assert (isempty(t)) |
5582 | 1162 |
1163 %!test | |
1164 %! [m, te, e, s, t] = regexp('short test string','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens'); | |
1165 %! assert (s,1) | |
1166 %! assert (e,5) | |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1167 %! assert (isempty(te)) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1168 %! assert (m,'short') |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1169 %! assert (isempty(t)) |
5582 | 1170 |
7242 | 1171 %!testif HAVE_PCRE |
1172 %! [s, e, te, m, t, nm] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)'); | |
1173 %! assert (s,1) | |
1174 %! assert (e,10) | |
1175 %! assert (size(te), [1,1]) | |
1176 %! assert (te{1}, [1 5; 7, 10]) | |
1177 %! assert (m{1},'short test') | |
1178 %! assert (size(t),[1,1]) | |
1179 %! assert (t{1}{1},'short') | |
1180 %! assert (t{1}{2},'test') | |
1181 %! assert (size(nm), [1,1]) | |
1182 %! assert (!isempty(fieldnames(nm))) | |
1183 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) | |
1184 %! assert (nm.word1,'short') | |
1185 %! assert (nm.word2,'test') | |
5582 | 1186 |
7242 | 1187 %!testif HAVE_PCRE |
1188 %! [nm, m, te, e, s, t] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens'); | |
1189 %! assert (s,1) | |
1190 %! assert (e,10) | |
1191 %! assert (size(te), [1,1]) | |
1192 %! assert (te{1}, [1 5; 7, 10]) | |
1193 %! assert (m{1},'short test') | |
1194 %! assert (size(t),[1,1]) | |
1195 %! assert (t{1}{1},'short') | |
1196 %! assert (t{1}{2},'test') | |
1197 %! assert (size(nm), [1,1]) | |
1198 %! assert (!isempty(fieldnames(nm))) | |
1199 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) | |
1200 %! assert (nm.word1,'short') | |
1201 %! assert (nm.word2,'test') | |
5619 | 1202 |
7242 | 1203 %!testif HAVE_PCRE |
1204 %! [t, nm] = regexp("John Davis\nRogers, James",'(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)','tokens','names'); | |
1205 %! assert (size(t), [1,2]); | |
1206 %! assert (t{1}{1},'John'); | |
1207 %! assert (t{1}{2},'Davis'); | |
1208 %! assert (t{2}{1},'Rogers'); | |
1209 %! assert (t{2}{2},'James'); | |
1210 %! assert (size(nm), [1,1]); | |
1211 %! assert (nm.first{1},'John'); | |
1212 %! assert (nm.first{2},'James'); | |
1213 %! assert (nm.last{1},'Davis'); | |
1214 %! assert (nm.last{2},'Rogers'); | |
5582 | 1215 |
10518
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
1216 %!testif HAVE_PCRE |
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
1217 %! # Parenthesis in named token (ie (int)) causes a problem |
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
1218 %! assert (regexp('qwe int asd', ['(?<typestr>(int))'], 'names'), struct ('typestr', 'int')); |
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
1219 |
5779 | 1220 %!assert(regexp("abc\nabc",'.'),[1:7]) |
1221 %!assert(regexp("abc\nabc",'.','dotall'),[1:7]) | |
7242 | 1222 %!testif HAVE_PCRE |
1223 %! assert(regexp("abc\nabc",'(?s).'),[1:7]) | |
1224 %! assert(regexp("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7]) | |
1225 %! assert(regexp("abc\nabc",'(?-s).'),[1,2,3,5,6,7]) | |
5779 | 1226 |
1227 %!assert(regexp("caseCaSe",'case'),1) | |
1228 %!assert(regexp("caseCaSe",'case',"matchcase"),1) | |
1229 %!assert(regexp("caseCaSe",'case',"ignorecase"),[1,5]) | |
7242 | 1230 %!testif HAVE_PCRE |
1231 %! assert(regexp("caseCaSe",'(?-i)case'),1) | |
1232 %! assert(regexp("caseCaSe",'(?i)case'),[1,5]) | |
5779 | 1233 |
1234 %!assert (regexp("abc\nabc",'c$'),7) | |
1235 %!assert (regexp("abc\nabc",'c$',"stringanchors"),7) | |
7242 | 1236 %!testif HAVE_PCRE |
1237 %! assert (regexp("abc\nabc",'(?-m)c$'),7) | |
1238 %! assert (regexp("abc\nabc",'c$',"lineanchors"),[3,7]) | |
1239 %! assert (regexp("abc\nabc",'(?m)c$'),[3,7]) | |
5779 | 1240 |
1241 %!assert (regexp("this word",'s w'),4) | |
1242 %!assert (regexp("this word",'s w','literalspacing'),4) | |
7242 | 1243 %!testif HAVE_PCRE |
1244 %! assert (regexp("this word",'(?-x)s w','literalspacing'),4) | |
1245 %! assert (regexp("this word",'s w','freespacing'),zeros(1,0)) | |
1246 %! assert (regexp("this word",'(?x)s w'),zeros(1,0)) | |
5779 | 1247 |
5582 | 1248 %!error regexp('string', 'tri', 'BadArg'); |
1249 %!error regexp('string'); | |
1250 | |
6361 | 1251 %!assert(regexp({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)}) |
1252 %!assert(regexp({'asdfg-dfd','-dfd-dfd-','qasfdfdaq'},'-'),{6,[1,5,9],zeros(1,0)}) | |
1253 %!assert(regexp({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},{'-';'f';'q'}),{6;[3,7];[1,9]}) | |
1254 %!assert(regexp('Strings',{'t','s'}),{2,7}) | |
1255 | |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
1256 ## Test case for lookaround operators |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1257 %!testif HAVE_PCRE |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1258 %! assert(regexp('Iraq','q(?!u)'),4) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1259 %! assert(regexp('quit','q(?!u)'), zeros(1,0)) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1260 %! assert(regexp('quit','q(?=u)','match'), {'q'}) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1261 %! assert(regexp("quit",'q(?=u+)','match'), {'q'}) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1262 %! assert(regexp("qit",'q(?=u+)','match'), cell(1,0)) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1263 %! assert(regexp("qit",'q(?=u*)','match'), {'q'}) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1264 %! assert(regexp('thingamabob','(?<=a)b'), 9) |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
1265 |
5582 | 1266 */ |
1267 | |
6549 | 1268 DEFUN_DLD (regexpi, args, nargout, |
5582 | 1269 "-*- texinfo -*-\n\ |
10840 | 1270 @deftypefn {Loadable Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}] =} regexpi (@var{str}, @var{pat})\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1271 @deftypefnx {Loadable Function} {[@dots{}] =} regexpi (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\ |
5582 | 1272 \n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1273 Case insensitive regular expression string matching. Search for @var{pat} in\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1274 @var{str} and return the positions and substrings of any matches, or empty\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1275 values if there are none. @xref{doc-regexp,,regexp}, for details on the\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1276 syntax of the search pattern.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1277 @seealso{regexp}\n\ |
5582 | 1278 @end deftypefn") |
1279 { | |
6361 | 1280 octave_value_list retval; |
1281 int nargin = args.length(); | |
1282 | |
1283 if (nargin < 2) | |
1284 print_usage (); | |
1285 else if (args(0).is_cell() || args(1).is_cell()) | |
1286 retval = octcellregexp (args, nargout, "regexpi", true); | |
1287 else | |
1288 retval = octregexp (args, nargout, "regexpi", true); | |
1289 | |
1290 return retval; | |
5582 | 1291 } |
1292 | |
1293 /* | |
1294 | |
1295 ## seg-fault test | |
1296 %!assert(regexpi("abcde","."),[1,2,3,4,5]) | |
1297 | |
1298 ## Check that anchoring of pattern works correctly | |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1299 %!assert(regexpi('abcabc','^ABC'),1); |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1300 %!assert(regexpi('abcabc','ABC$'),4); |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1301 %!assert(regexpi('abcabc','^ABC$'),zeros(1,0)); |
5582 | 1302 |
1303 %!test | |
1304 %! [s, e, te, m, t] = regexpi(' No Match ', 'f(.*)uck'); | |
5785 | 1305 %! assert (s,zeros(1,0)) |
1306 %! assert (e,zeros(1,0)) | |
1307 %! assert (te,cell(1,0)) | |
1308 %! assert (m, cell(1,0)) | |
1309 %! assert (t, cell(1,0)) | |
5582 | 1310 |
1311 %!test | |
1312 %! [s, e, te, m, t] = regexpi(' FiRetrUck ', 'f(.*)uck'); | |
1313 %! assert (s,2) | |
1314 %! assert (e,10) | |
1315 %! assert (te{1},[3,7]) | |
1316 %! assert (m{1}, 'FiRetrUck') | |
1317 %! assert (t{1}{1}, 'iRetr') | |
1318 | |
1319 %!test | |
1320 %! [s, e, te, m, t] = regexpi(' firetruck ', 'f(.*)uck'); | |
1321 %! assert (s,2) | |
1322 %! assert (e,10) | |
1323 %! assert (te{1},[3,7]) | |
1324 %! assert (m{1}, 'firetruck') | |
1325 %! assert (t{1}{1}, 'iretr') | |
1326 | |
1327 %!test | |
1328 %! [s, e, te, m, t] = regexpi('ShoRt Test String','\w*r\w*'); | |
1329 %! assert (s,[1,12]) | |
1330 %! assert (e,[5,17]) | |
1331 %! assert (size(te), [1,2]) | |
1332 %! assert (isempty(te{1})) | |
1333 %! assert (isempty(te{2})) | |
1334 %! assert (m{1},'ShoRt') | |
1335 %! assert (m{2},'String') | |
1336 %! assert (size(t), [1,2]) | |
1337 %! assert (isempty(t{1})) | |
1338 %! assert (isempty(t{2})) | |
1339 | |
1340 %!test | |
1341 %! [s, e, te, m, t] = regexpi('ShoRt Test String','\w*r\w*','once'); | |
1342 %! assert (s,1) | |
1343 %! assert (e,5) | |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1344 %! assert (isempty(te)) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1345 %! assert (m,'ShoRt') |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1346 %! assert (isempty(t)) |
5582 | 1347 |
1348 %!test | |
1349 %! [m, te, e, s, t] = regexpi('ShoRt Test String','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens'); | |
1350 %! assert (s,1) | |
1351 %! assert (e,5) | |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1352 %! assert (isempty(te)) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1353 %! assert (m,'ShoRt') |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1354 %! assert (isempty(t)) |
5582 | 1355 |
7242 | 1356 %!testif HAVE_PCRE |
1357 %! [s, e, te, m, t, nm] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)'); | |
1358 %! assert (s,1) | |
1359 %! assert (e,10) | |
1360 %! assert (size(te), [1,1]) | |
1361 %! assert (te{1}, [1 5; 7, 10]) | |
1362 %! assert (m{1},'ShoRt Test') | |
1363 %! assert (size(t),[1,1]) | |
1364 %! assert (t{1}{1},'ShoRt') | |
1365 %! assert (t{1}{2},'Test') | |
1366 %! assert (size(nm), [1,1]) | |
1367 %! assert (!isempty(fieldnames(nm))) | |
1368 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) | |
1369 %! assert (nm.word1,'ShoRt') | |
1370 %! assert (nm.word2,'Test') | |
5582 | 1371 |
7242 | 1372 %!testif HAVE_PCRE |
1373 %! [nm, m, te, e, s, t] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens'); | |
1374 %! assert (s,1) | |
1375 %! assert (e,10) | |
1376 %! assert (size(te), [1,1]) | |
1377 %! assert (te{1}, [1 5; 7, 10]) | |
1378 %! assert (m{1},'ShoRt Test') | |
1379 %! assert (size(t),[1,1]) | |
1380 %! assert (t{1}{1},'ShoRt') | |
1381 %! assert (t{1}{2},'Test') | |
1382 %! assert (size(nm), [1,1]) | |
1383 %! assert (!isempty(fieldnames(nm))) | |
1384 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) | |
1385 %! assert (nm.word1,'ShoRt') | |
1386 %! assert (nm.word2,'Test') | |
5582 | 1387 |
5779 | 1388 %!assert(regexpi("abc\nabc",'.'),[1:7]) |
1389 %!assert(regexpi("abc\nabc",'.','dotall'),[1:7]) | |
7242 | 1390 %!testif HAVE_PCRE |
1391 %! assert(regexpi("abc\nabc",'(?s).'),[1:7]) | |
1392 %! assert(regexpi("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7]) | |
1393 %! assert(regexpi("abc\nabc",'(?-s).'),[1,2,3,5,6,7]) | |
5779 | 1394 |
1395 %!assert(regexpi("caseCaSe",'case'),[1,5]) | |
1396 %!assert(regexpi("caseCaSe",'case',"matchcase"),1) | |
1397 %!assert(regexpi("caseCaSe",'case',"ignorecase"),[1,5]) | |
7242 | 1398 %!testif HAVE_PCRE |
1399 %! assert(regexpi("caseCaSe",'(?-i)case'),1) | |
1400 %! assert(regexpi("caseCaSe",'(?i)case'),[1,5]) | |
5779 | 1401 |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1402 %!assert (regexpi("abc\nabc",'C$'),7) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1403 %!assert (regexpi("abc\nabc",'C$',"stringanchors"),7) |
7242 | 1404 %!testif HAVE_PCRE |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1405 %! assert (regexpi("abc\nabc",'(?-m)C$'),7) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1406 %! assert (regexpi("abc\nabc",'C$',"lineanchors"),[3,7]) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1407 %! assert (regexpi("abc\nabc",'(?m)C$'),[3,7]) |
5779 | 1408 |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1409 %!assert (regexpi("this word",'S w'),4) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1410 %!assert (regexpi("this word",'S w','literalspacing'),4) |
7242 | 1411 %!testif HAVE_PCRE |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1412 %! assert (regexpi("this word",'(?-x)S w','literalspacing'),4) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1413 %! assert (regexpi("this word",'S w','freespacing'),zeros(1,0)) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1414 %! assert (regexpi("this word",'(?x)S w'),zeros(1,0)) |
5779 | 1415 |
5582 | 1416 %!error regexpi('string', 'tri', 'BadArg'); |
1417 %!error regexpi('string'); | |
1418 | |
6361 | 1419 %!assert(regexpi({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)}) |
1420 %!assert(regexpi({'asdfg-dfd','-dfd-dfd-','qasfdfdaq'},'-'),{6,[1,5,9],zeros(1,0)}) | |
1421 %!assert(regexpi({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},{'-';'f';'q'}),{6;[3,7];[1,9]}) | |
1422 %!assert(regexpi('Strings',{'t','s'}),{2,[1,7]}) | |
1423 | |
5582 | 1424 */ |
1425 | |
6361 | 1426 |
1427 static octave_value | |
1428 octregexprep (const octave_value_list &args, const std::string &nm) | |
5785 | 1429 { |
6361 | 1430 octave_value retval; |
5785 | 1431 int nargin = args.length(); |
1432 | |
1433 // Make sure we have string,pattern,replacement | |
1434 const std::string buffer = args(0).string_value (); | |
1435 if (error_state) return retval; | |
1436 const std::string pattern = args(1).string_value (); | |
1437 if (error_state) return retval; | |
1438 const std::string replacement = args(2).string_value (); | |
1439 if (error_state) return retval; | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1440 |
5785 | 1441 // Pack options excluding 'tokenize' and various output |
1442 // reordering strings into regexp arg list | |
1443 octave_value_list regexpargs(nargin-1,octave_value()); | |
1444 regexpargs(0) = args(0); | |
1445 regexpargs(1) = args(1); | |
1446 int len=2; | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1447 for (int i = 3; i < nargin; i++) |
5785 | 1448 { |
1449 const std::string opt = args(i).string_value(); | |
1450 if (opt != "tokenize" && opt != "start" && opt != "end" | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1451 && opt != "tokenextents" && opt != "match" && opt != "tokens" |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1452 && opt != "names" && opt != "warnings") |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1453 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1454 regexpargs(len++) = args(i); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1455 } |
5785 | 1456 } |
1457 regexpargs.resize(len); | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1458 |
5785 | 1459 // Identify replacement tokens; build a vector of group numbers in |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1460 // the replacement string so that we can quickly calculate the size |
5785 | 1461 // of the replacement. |
1462 int tokens = 0; | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1463 for (size_t i=1; i < replacement.size(); i++) |
5785 | 1464 { |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1465 if (replacement[i-1]=='$' && isdigit(replacement[i])) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1466 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1467 tokens++, i++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1468 } |
5785 | 1469 } |
1470 std::vector<int> token(tokens); | |
1471 int kk = 0; | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1472 for (size_t i = 1; i < replacement.size(); i++) |
5785 | 1473 { |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1474 if (replacement[i-1]=='$' && isdigit(replacement[i])) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1475 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1476 token[kk++] = replacement[i]-'0'; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1477 i++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1478 } |
5785 | 1479 } |
1480 | |
1481 // Perform replacement | |
1482 std::string rep; | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1483 if (tokens > 0) |
5785 | 1484 { |
1485 std::list<regexp_elem> lst; | |
1486 string_vector named; | |
1487 int nopts; | |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1488 bool once; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1489 int sz = octregexp_list (regexpargs, nm , false, lst, named, nopts, once); |
5785 | 1490 |
1491 if (error_state) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1492 return retval; |
5785 | 1493 if (sz == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1494 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1495 retval = args(0); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1496 return retval; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1497 } |
5785 | 1498 |
1499 // Determine replacement length | |
1500 const size_t replen = replacement.size() - 2*tokens; | |
1501 int delta = 0; | |
1502 const_iterator p = lst.begin(); | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1503 for (int i = 0; i < sz; i++) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1504 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1505 OCTAVE_QUIT; |
5785 | 1506 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1507 const Matrix pairs(p->te); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1508 size_t pairlen = 0; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1509 for (int j = 0; j < tokens; j++) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1510 { |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1511 if (token[j] == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1512 pairlen += static_cast<size_t>(p->e - p->s) + 1; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1513 else if (token[j] <= pairs.rows()) |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1514 pairlen += static_cast<size_t>(pairs(token[j]-1,1) - |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1515 pairs(token[j]-1,0)) + 1; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1516 } |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1517 delta += static_cast<int>(replen + pairlen) - |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1518 static_cast<int>(p->e - p->s + 1); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1519 p++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1520 } |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1521 |
5785 | 1522 // Build replacement string |
1523 rep.reserve(buffer.size()+delta); | |
1524 size_t from = 0; | |
1525 p = lst.begin(); | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1526 for (int i=0; i < sz; i++) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1527 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1528 OCTAVE_QUIT; |
5785 | 1529 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1530 const Matrix pairs(p->te); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1531 rep.append(&buffer[from], static_cast<size_t>(p->s - 1) - from); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1532 from = static_cast<size_t>(p->e - 1) + 1; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1533 for (size_t j = 1; j < replacement.size(); j++) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1534 { |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1535 if (replacement[j-1]=='$' && isdigit(replacement[j])) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1536 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1537 int k = replacement[j]-'0'; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1538 if (k == 0) |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1539 { |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1540 // replace with entire match |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1541 rep.append(&buffer[static_cast<size_t>(p->e - 1)], |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1542 static_cast<size_t>(p->e - p->s) + 1); |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1543 } |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1544 else if (k <= pairs.rows()) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1545 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1546 // replace with group capture |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1547 rep.append(&buffer[static_cast<size_t>(pairs(k-1,0)-1)], |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1548 static_cast<size_t>(pairs(k-1,1) - |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1549 pairs(k-1,0))+1); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1550 } |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1551 else |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1552 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1553 // replace with nothing |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1554 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1555 j++; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1556 } |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1557 else |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1558 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1559 rep.append(1,replacement[j-1]); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1560 } |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1561 if (j+1 == replacement.size()) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1562 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1563 rep.append(1,replacement[j]); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1564 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1565 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1566 p++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1567 } |
5785 | 1568 rep.append(&buffer[from],buffer.size()-from); |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1569 } |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1570 else |
5785 | 1571 { |
1572 std::list<regexp_elem> lst; | |
1573 string_vector named; | |
1574 int nopts; | |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1575 bool once; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1576 int sz = octregexp_list (regexpargs, nm, false, lst, named, nopts, once); |
5785 | 1577 |
1578 if (error_state) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1579 return retval; |
5785 | 1580 if (sz == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1581 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1582 retval = args(0); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1583 return retval; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1584 } |
5785 | 1585 |
1586 // Determine replacement length | |
1587 const size_t replen = replacement.size(); | |
1588 int delta = 0; | |
1589 const_iterator p = lst.begin(); | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1590 for (int i = 0; i < sz; i++) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1591 { |
5785 | 1592 OCTAVE_QUIT; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1593 delta += static_cast<int>(replen) - |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1594 static_cast<int>(p->e - p->s + 1); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1595 p++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1596 } |
5785 | 1597 |
1598 // Build replacement string | |
1599 rep.reserve(buffer.size()+delta); | |
1600 size_t from = 0; | |
1601 p = lst.begin(); | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1602 for (int i=0; i < sz; i++) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1603 { |
5785 | 1604 OCTAVE_QUIT; |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1605 rep.append(&buffer[from], static_cast<size_t>(p->s - 1) - from); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1606 from = static_cast<size_t>(p->e - 1) + 1; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1607 rep.append(replacement); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1608 p++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1609 } |
5785 | 1610 rep.append(&buffer[from],buffer.size()-from); |
1611 } | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1612 |
6361 | 1613 retval = rep; |
1614 return retval; | |
1615 } | |
1616 | |
6549 | 1617 DEFUN_DLD (regexprep, args, , |
6361 | 1618 "-*- texinfo -*-\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1619 @deftypefn {Loadable Function} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr})\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1620 @deftypefnx {Loadable Function} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr}, \"@var{opt1}\", @dots{})\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1621 Replace occurrences of pattern @var{pat} in @var{string} with @var{repstr}.\n\ |
6361 | 1622 \n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1623 The pattern is a regular expression as documented for @code{regexp}.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1624 @xref{doc-regexp,,regexp}.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1625 \n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1626 The replacement string may contain @code{$i}, which substitutes\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1627 for the ith set of parentheses in the match string. For example,\n\ |
10840 | 1628 \n\ |
6361 | 1629 @example\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1630 regexprep(\"Bill Dunn\",'(\\w+) (\\w+)','$2, $1')\n\ |
6361 | 1631 @end example\n\ |
10840 | 1632 \n\ |
10846
a4f482e66b65
Grammarcheck more of the documentation.
Rik <octave@nomad.inbox5.com>
parents:
10840
diff
changeset
|
1633 @noindent\n\ |
6361 | 1634 returns \"Dunn, Bill\"\n\ |
1635 \n\ | |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1636 Options in addition to those of @code{regexp} are\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1637 \n\ |
6361 | 1638 @table @samp\n\ |
1639 \n\ | |
1640 @item once\n\ | |
7001 | 1641 Replace only the first occurrence of @var{pat} in the result.\n\ |
6361 | 1642 \n\ |
1643 @item warnings\n\ | |
1644 This option is present for compatibility but is ignored.\n\ | |
1645 \n\ | |
1646 @end table\n\ | |
11572
7d6d8c1e471f
Grammarcheck Texinfo for files in src directory.
Rik <octave@nomad.inbox5.com>
parents:
11553
diff
changeset
|
1647 @seealso{regexp, regexpi, strrep}\n\ |
6361 | 1648 @end deftypefn") |
1649 { | |
1650 octave_value_list retval; | |
1651 int nargin = args.length(); | |
1652 | |
1653 if (nargin < 3) | |
1654 { | |
1655 print_usage (); | |
1656 return retval; | |
1657 } | |
1658 | |
1659 if (args(0).is_cell() || args(1).is_cell() || args(2).is_cell()) | |
1660 { | |
1661 Cell str; | |
1662 Cell pat; | |
1663 Cell rep; | |
6495 | 1664 dim_vector dv0; |
1665 dim_vector dv1(1,1); | |
6361 | 1666 |
1667 if (args(0).is_cell()) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1668 str = args(0).cell_value(); |
6361 | 1669 else |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1670 str = Cell (args(0)); |
6361 | 1671 |
1672 if (args(1).is_cell()) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1673 pat = args(1).cell_value(); |
6361 | 1674 else |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1675 pat = Cell (args(1)); |
6361 | 1676 |
1677 if (args(2).is_cell()) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1678 rep = args(2).cell_value(); |
6361 | 1679 else |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1680 rep = Cell (args(2)); |
6361 | 1681 |
6495 | 1682 dv0 = str.dims(); |
1683 if (pat.numel() != 1) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1684 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1685 dv1 = pat.dims(); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1686 if (rep.numel() != 1 && dv1 != rep.dims()) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1687 error ("regexprep: Inconsistent cell array dimensions"); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1688 } |
6361 | 1689 else if (rep.numel() != 1) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1690 dv1 = rep.dims(); |
6361 | 1691 |
1692 if (!error_state) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1693 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1694 Cell ret (dv0); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1695 octave_value_list new_args = args; |
6361 | 1696 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1697 for (octave_idx_type i = 0; i < dv0.numel(); i++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1698 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1699 new_args(0) = str(i); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1700 if (pat.numel() == 1) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1701 new_args(1) = pat(0); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1702 if (rep.numel() == 1) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1703 new_args(2) = rep(0); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1704 for (octave_idx_type j = 0; j < dv1.numel(); j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1705 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1706 if (pat.numel() != 1) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1707 new_args(1) = pat(j); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1708 if (rep.numel() != 1) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1709 new_args(2) = rep(j); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1710 new_args(0) = octregexprep (new_args, "regexprep"); |
6361 | 1711 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1712 if (error_state) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1713 break; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1714 } |
6361 | 1715 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1716 if (error_state) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1717 break; |
6495 | 1718 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1719 ret(i) = new_args(0); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1720 } |
6361 | 1721 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1722 if (!error_state) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1723 retval = octave_value (ret); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1724 } |
6361 | 1725 } |
1726 else | |
1727 retval = octregexprep (args, "regexprep"); | |
1728 | |
5785 | 1729 return retval; |
1730 } | |
1731 | |
1732 /* | |
1733 %!test # Replace with empty | |
1734 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>'; | |
1735 %! t = regexprep(xml,'<[!?][^>]*>',''); | |
1736 %! assert(t,' <tag v="hello">some stuff</tag>') | |
1737 | |
1738 %!test # Replace with non-empty | |
1739 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>'; | |
1740 %! t = regexprep(xml,'<[!?][^>]*>','?'); | |
1741 %! assert(t,'? <tag v="hello">some stuff?</tag>') | |
1742 | |
1743 %!test # Check that 'tokenize' is ignored | |
1744 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>'; | |
1745 %! t = regexprep(xml,'<[!?][^>]*>','','tokenize'); | |
1746 %! assert(t,' <tag v="hello">some stuff</tag>') | |
1747 | |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
1748 ## Test capture replacement |
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
1749 %!testif HAVE_PCRE |
7242 | 1750 %! data = "Bob Smith\nDavid Hollerith\nSam Jenkins"; |
1751 %! result = "Smith, Bob\nHollerith, David\nJenkins, Sam"; | |
1752 %! t = regexprep(data,'(?m)^(\w+)\s+(\w+)$','$2, $1'); | |
1753 %! assert(t,result) | |
5785 | 1754 |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
1755 ## Return the original if no match |
5785 | 1756 %!assert(regexprep('hello','world','earth'),'hello') |
1757 | |
1758 ## Test a general replacement | |
1759 %!assert(regexprep("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_"), "a_b_c_d_e_f_g"); | |
1760 | |
1761 ## Make sure it works at the beginning and end | |
1762 %!assert(regexprep("a[b]c{d}e-f=g", "a", "_"), "_[b]c{d}e-f=g"); | |
1763 %!assert(regexprep("a[b]c{d}e-f=g", "g", "_"), "a[b]c{d}e-f=_"); | |
1764 | |
1765 ## Options | |
1766 %!assert(regexprep("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_", "once"), "a_b]c{d}e-f=g"); | |
1767 %!assert(regexprep("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "ignorecase"), "a_b_c_d_e_f_g"); | |
1768 | |
1769 ## Option combinations | |
1770 %!assert(regexprep("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "once", "ignorecase"), "a_b]c{d}e-f=g"); | |
1771 | |
1772 ## End conditions on replacement | |
1773 %!assert(regexprep("abc","(b)",".$1"),"a.bc"); | |
1774 %!assert(regexprep("abc","(b)","$1"),"abc"); | |
1775 %!assert(regexprep("abc","(b)","$1."),"ab.c"); | |
1776 %!assert(regexprep("abc","(b)","$1.."),"ab..c"); | |
1777 | |
6361 | 1778 ## Test cell array arguments |
6503 | 1779 %!assert(regexprep("abc",{"b","a"},"?"),{"??c"}) |
6361 | 1780 %!assert(regexprep({"abc","cba"},"b","?"),{"a?c","c?a"}) |
6503 | 1781 %!assert(regexprep({"abc","cba"},{"b","a"},{"?","!"}),{"!?c","c?!"}) |
6361 | 1782 |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
1783 # Nasty lookbehind expression |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1784 %!testif HAVE_PCRE |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1785 %! assert(regexprep('x^(-1)+y(-1)+z(-1)=0','(?<=[a-z]+)\(\-[1-9]*\)','_minus1'),'x^(-1)+y_minus1+z_minus1=0') |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
1786 |
5785 | 1787 */ |