Mercurial > jwe > octave
annotate src/DLD-FUNCTIONS/regexp.cc @ 13311:d590d9df5596
regexprep: only return cell array if first arg is cell array
* regexp.cc (Fregexprep): Only return cell array if first argument
is originally a cell array.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Mon, 10 Oct 2011 15:08:48 -0400 |
parents | 583940a28bfd |
children | c4b6ea833fa5 |
rev | line source |
---|---|
5582 | 1 /* |
2 | |
11523 | 3 Copyright (C) 2005-2011 David Bateman |
4 Copyright (C) 2002-2005 Paul Kienzle | |
7016 | 5 |
6 This file is part of Octave. | |
5582 | 7 |
8 Octave is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by the | |
7016 | 10 Free Software Foundation; either version 3 of the License, or (at your |
11 option) any later version. | |
5582 | 12 |
13 Octave is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
7016 | 19 along with Octave; see the file COPYING. If not, see |
20 <http://www.gnu.org/licenses/>. | |
5582 | 21 |
22 */ | |
23 | |
24 #ifdef HAVE_CONFIG_H | |
25 #include <config.h> | |
26 #endif | |
27 | |
5773 | 28 #include <algorithm> |
5765 | 29 #include <sstream> |
30 | |
5582 | 31 #include "defun-dld.h" |
32 #include "error.h" | |
33 #include "gripes.h" | |
34 #include "oct-obj.h" | |
35 #include "utils.h" | |
36 | |
37 #include "Cell.h" | |
38 #include "oct-map.h" | |
39 #include "str-vec.h" | |
5785 | 40 #include "quit.h" |
41 #include "parse.h" | |
8377
25bc2d31e1bf
improve OCTAVE_LOCAL_BUFFER
Jaroslav Hajek <highegg@gmail.com>
parents:
8140
diff
changeset
|
42 #include "oct-locbuf.h" |
5582 | 43 |
44 #include <pcre.h> | |
45 | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
46 // Define the maximum number of retries for a pattern that |
8140
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
47 // possibly results in an infinite recursion. |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
48 #define PCRE_MATCHLIMIT_MAX 10 |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
49 |
5785 | 50 // The regexp is constructed as a linked list to avoid resizing the |
51 // return values in arrays at each new match. | |
52 | |
53 // FIXME don't bother collecting and composing return values the user | |
54 // doesn't want. | |
55 | |
56 class regexp_elem | |
5582 | 57 { |
5785 | 58 public: |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
59 regexp_elem (const string_vector& _named_token, const Cell& _t, |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
60 const std::string& _m, const Matrix& _te, double _s, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
61 double _e) : |
5785 | 62 named_token (_named_token), t (_t), m (_m), te (_te), s (_s), e (_e) { } |
63 | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
64 regexp_elem (const regexp_elem &a) : named_token (a.named_token), t (a.t), |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
65 m (a.m), te (a.te), s (a.s), e (a.e) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
66 { } |
5785 | 67 |
68 string_vector named_token; | |
69 Cell t; | |
70 std::string m; | |
71 Matrix te; | |
72 double s; | |
73 double e; | |
74 }; | |
75 | |
76 typedef std::list<regexp_elem>::const_iterator const_iterator; | |
77 | |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
78 #define MAXLOOKBEHIND 10 |
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
79 static bool lookbehind_warned = false; |
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
80 |
5785 | 81 static int |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
82 octregexp_list (const octave_value_list &args, const std::string &nm, |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
83 bool case_insensitive, std::list<regexp_elem> &lst, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
84 string_vector &named, int &nopts, bool &once) |
5785 | 85 { |
86 int sz = 0; | |
12464
dfeea9cae79e
require PCRE to build Octave
John W. Eaton <jwe@octave.org>
parents:
12462
diff
changeset
|
87 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
88 int nargin = args.length (); |
5779 | 89 bool lineanchors = false; |
90 bool dotexceptnewline = false; | |
91 bool freespacing = false; | |
5582 | 92 |
5785 | 93 nopts = nargin - 2; |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
94 once = false; |
5785 | 95 |
5582 | 96 std::string buffer = args(0).string_value (); |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
97 size_t max_length = (buffer.length () > MAXLOOKBEHIND ? |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
98 MAXLOOKBEHIND: buffer.length ()); |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
99 |
5582 | 100 if (error_state) |
101 { | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
102 gripe_wrong_type_arg (nm.c_str (), args(0)); |
5785 | 103 return 0; |
5582 | 104 } |
105 | |
106 std::string pattern = args(1).string_value (); | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
107 |
5582 | 108 if (error_state) |
109 { | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
110 gripe_wrong_type_arg (nm.c_str (), args(1)); |
5785 | 111 return 0; |
5582 | 112 } |
113 | |
114 for (int i = 2; i < nargin; i++) | |
115 { | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
116 std::string str = args(i).string_value (); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
117 |
5582 | 118 if (error_state) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
119 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
120 error ("%s: optional arguments must be strings", nm.c_str ()); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
121 break; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
122 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
123 |
5582 | 124 std::transform (str.begin (), str.end (), str.begin (), tolower); |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
125 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
126 if (str.find ("once", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
127 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
128 once = true; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
129 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
130 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
131 else if (str.find ("matchcase", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
132 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
133 case_insensitive = false; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
134 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
135 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
136 else if (str.find ("ignorecase", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
137 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
138 case_insensitive = true; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
139 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
140 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
141 else if (str.find ("dotall", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
142 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
143 dotexceptnewline = false; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
144 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
145 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
146 else if (str.find ("stringanchors", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
147 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
148 lineanchors = false; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
149 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
150 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
151 else if (str.find ("literalspacing", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
152 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
153 freespacing = false; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
154 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
155 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
156 else if (str.find ("dotexceptnewline", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
157 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
158 dotexceptnewline = true; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
159 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
160 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
161 else if (str.find ("lineanchors", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
162 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
163 lineanchors = true; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
164 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
165 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
166 else if (str.find ("freespacing", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
167 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
168 freespacing = true; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
169 nopts--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
170 } |
13310
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
171 else if (str.find ("start", 0) && str.find ("end", 0) |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
172 && str.find ("tokenextents", 0) && str.find ("match", 0) |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
173 && str.find ("tokens", 0) && str.find ("names", 0) |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
174 && str.find ("split", 0)) |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
175 error ("%s: unrecognized option", nm.c_str ()); |
5582 | 176 } |
177 | |
178 if (!error_state) | |
179 { | |
5785 | 180 Cell t; |
181 std::string m; | |
182 double s, e; | |
5582 | 183 |
184 // named tokens "(?<name>...)" are only treated with PCRE not regex. | |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
185 |
5619 | 186 size_t pos = 0; |
187 size_t new_pos; | |
188 int nnames = 0; | |
189 int inames = 0; | |
5765 | 190 std::ostringstream buf; |
5619 | 191 Array<int> named_idx; |
5582 | 192 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
193 while ((new_pos = pattern.find ("(?", pos)) != std::string::npos) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
194 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
195 if (pattern.at (new_pos + 2) == '<' |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
196 && !(pattern.at (new_pos + 3) == '=' |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
197 || pattern.at (new_pos + 3) == '!')) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
198 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
199 // The syntax of named tokens in pcre is "(?P<name>...)" while |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
200 // we need a syntax "(?<name>...)", so fix that here. Also an |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
201 // expression like |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
202 // "(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)" |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
203 // should be perfectly legal, while pcre does not allow the same |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
204 // named token name on both sides of the alternative. Also fix |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
205 // that here by replacing name tokens by dummy names, and dealing |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
206 // with the dummy names later. |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
207 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
208 size_t tmp_pos = pattern.find_first_of ('>', new_pos); |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
209 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
210 if (tmp_pos == std::string::npos) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
211 { |
11553
01f703952eff
Improve docstrings for functions in DLD-FUNCTIONS directory.
Rik <octave@nomad.inbox5.com>
parents:
11523
diff
changeset
|
212 error ("regexp: syntax error in pattern"); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
213 break; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
214 } |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
215 |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
216 std::string tmp_name = |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
217 pattern.substr (new_pos+3, tmp_pos-new_pos-3); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
218 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
219 bool found = false; |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
220 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
221 for (int i = 0; i < nnames; i++) |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
222 { |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
223 if (named(i) == tmp_name) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
224 { |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
225 named_idx.resize (dim_vector (inames+1, 1)); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
226 named_idx(inames) = i; |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
227 found = true; |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
228 break; |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
229 } |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
230 } |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
231 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
232 if (! found) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
233 { |
12433
02669a1aa070
regexp.cc: avoid deprecated Array<T>:resize function
John W. Eaton <jwe@octave.org>
parents:
11590
diff
changeset
|
234 named_idx.resize (dim_vector (inames+1, 1)); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
235 named_idx(inames) = nnames; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
236 named.append (tmp_name); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
237 nnames++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
238 } |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
239 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
240 if (new_pos - pos > 0) |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
241 buf << pattern.substr (pos, new_pos-pos); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
242 if (inames < 10) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
243 buf << "(?P<n00" << inames++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
244 else if (inames < 100) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
245 buf << "(?P<n0" << inames++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
246 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
247 buf << "(?P<n" << inames++; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
248 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
249 pos = tmp_pos; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
250 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
251 else if (pattern.at (new_pos + 2) == '<') |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
252 { |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
253 // Find lookbehind operators of arbitrary length (ie like |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
254 // "(?<=[a-z]*)") and replace with a maximum length operator |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
255 // as PCRE can not yet handle arbitrary length lookahead |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
256 // operators. Use the string length as the maximum length to |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
257 // avoid issues. |
5582 | 258 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
259 int brackets = 1; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
260 size_t tmp_pos1 = new_pos + 2; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
261 size_t tmp_pos2 = tmp_pos1; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
262 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
263 while (tmp_pos1 <= pattern.length () && brackets > 0) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
264 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
265 char ch = pattern.at (tmp_pos1); |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
266 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
267 if (ch == '(') |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
268 brackets++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
269 else if (ch == ')') |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
270 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
271 if (brackets > 1) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
272 tmp_pos2 = tmp_pos1; |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
273 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
274 brackets--; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
275 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
276 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
277 tmp_pos1++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
278 } |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
279 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
280 if (brackets != 0) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
281 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
282 buf << pattern.substr (pos, new_pos - pos) << "(?"; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
283 pos = new_pos + 2; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
284 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
285 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
286 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
287 size_t tmp_pos3 = pattern.find_first_of ("*+", tmp_pos2); |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
288 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
289 if (tmp_pos3 != std::string::npos && tmp_pos3 < tmp_pos1) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
290 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
291 if (!lookbehind_warned) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
292 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
293 lookbehind_warned = true; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
294 warning ("%s: arbitrary length lookbehind patterns are only supported up to length %d", |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
295 nm.c_str (), MAXLOOKBEHIND); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
296 } |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
297 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
298 buf << pattern.substr (pos, new_pos - pos) << "("; |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
299 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
300 size_t i; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
301 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
302 if (pattern.at (tmp_pos3) == '*') |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
303 i = 0; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
304 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
305 i = 1; |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
306 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
307 for (; i < max_length + 1; i++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
308 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
309 buf << pattern.substr (new_pos, tmp_pos3 - new_pos) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
310 << "{" << i << "}"; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
311 buf << pattern.substr (tmp_pos3 + 1, |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
312 tmp_pos1 - tmp_pos3 - 1); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
313 if (i != max_length) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
314 buf << "|"; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
315 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
316 buf << ")"; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
317 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
318 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
319 buf << pattern.substr (pos, tmp_pos1 - pos); |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
320 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
321 pos = tmp_pos1; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
322 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
323 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
324 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
325 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
326 buf << pattern.substr (pos, new_pos - pos) << "(?"; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
327 pos = new_pos + 2; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
328 } |
5619 | 329 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
330 } |
5619 | 331 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
332 buf << pattern.substr (pos); |
5619 | 333 |
334 if (error_state) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
335 return 0; |
5582 | 336 |
337 // Compile expression | |
338 const char *err; | |
339 int erroffset; | |
5765 | 340 std::string buf_str = buf.str (); |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
341 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
342 pcre *re = pcre_compile (buf_str.c_str (), |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
343 ((case_insensitive ? PCRE_CASELESS : 0) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
344 | (dotexceptnewline ? 0 : PCRE_DOTALL) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
345 | (lineanchors ? PCRE_MULTILINE : 0) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
346 | (freespacing ? PCRE_EXTENDED : 0)), |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
347 &err, &erroffset, 0); |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
348 |
10550 | 349 if (re == 0) |
350 { | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
351 error ("%s: %s at position %d of expression", nm.c_str (), |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
352 err, erroffset); |
10550 | 353 return 0; |
354 } | |
5582 | 355 |
356 int subpatterns; | |
357 int namecount; | |
358 int nameentrysize; | |
359 char *nametable; | |
360 int idx = 0; | |
361 | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
362 pcre_fullinfo (re, 0, PCRE_INFO_CAPTURECOUNT, &subpatterns); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
363 pcre_fullinfo (re, 0, PCRE_INFO_NAMECOUNT, &namecount); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
364 pcre_fullinfo (re, 0, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
365 pcre_fullinfo (re, 0, PCRE_INFO_NAMETABLE, &nametable); |
5582 | 366 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
367 OCTAVE_LOCAL_BUFFER (int, ovector, (subpatterns+1)*3); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
368 OCTAVE_LOCAL_BUFFER (int, nidx, namecount); |
5582 | 369 |
370 for (int i = 0; i < namecount; i++) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
371 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
372 // Index of subpattern in first two bytes MSB first of name. |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
373 // Extract index. |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
374 nidx[i] = (static_cast<int> (nametable[i*nameentrysize])) << 8 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
375 | static_cast<int> (nametable[i*nameentrysize+1]); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
376 } |
5582 | 377 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
378 while (true) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
379 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
380 OCTAVE_QUIT; |
5785 | 381 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
382 int matches = pcre_exec (re, 0, buffer.c_str (), |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
383 buffer.length (), idx, |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
384 (idx ? PCRE_NOTBOL : 0), |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
385 ovector, (subpatterns+1)*3); |
5582 | 386 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
387 if (matches == PCRE_ERROR_MATCHLIMIT) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
388 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
389 // Try harder; start with default value for MATCH_LIMIT |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
390 // and increase it. |
11590
4ced6b90fffb
style fixes for warning and error messages in source files
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
391 warning ("your pattern caused PCRE to hit its MATCH_LIMIT; trying harder now, but this will be slow"); |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
392 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
393 pcre_extra pe; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
394 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
395 pcre_config (PCRE_CONFIG_MATCH_LIMIT, |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
396 static_cast <void *> (&pe.match_limit)); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
397 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
398 pe.flags = PCRE_EXTRA_MATCH_LIMIT; |
8140
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
399 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
400 int i = 0; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
401 while (matches == PCRE_ERROR_MATCHLIMIT |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
402 && i++ < PCRE_MATCHLIMIT_MAX) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
403 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
404 OCTAVE_QUIT; |
8140
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
405 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
406 pe.match_limit *= 10; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
407 matches = pcre_exec (re, &pe, buffer.c_str (), |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
408 buffer.length (), idx, |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
409 (idx ? PCRE_NOTBOL : 0), |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
410 ovector, (subpatterns+1)*3); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
411 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
412 } |
8140
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
413 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
414 if (matches < 0 && matches != PCRE_ERROR_NOMATCH) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
415 { |
11590
4ced6b90fffb
style fixes for warning and error messages in source files
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
416 error ("%s: internal error calling pcre_exec; error code from pcre_exec is %i", |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
417 nm.c_str (), matches); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
418 pcre_free (re); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
419 return 0; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
420 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
421 else if (matches == PCRE_ERROR_NOMATCH) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
422 break; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
423 else if (ovector[1] <= ovector[0]) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
424 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
425 // FIXME: Zero sized match!! Is this the right thing to do? |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
426 idx = ovector[0] + 1; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
427 continue; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
428 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
429 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
430 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
431 int pos_match = 0; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
432 Matrix te (matches-1, 2); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
433 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
434 for (int i = 1; i < matches; i++) |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
435 { |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
436 if (ovector[2*i] >= 0 && ovector[2*i+1] > 0 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
437 && (i == 1 || ovector[2*i] != ovector[2*i-2] |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
438 || ovector[2*i-1] != ovector[2*i+1]) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
439 && ovector[2*i] >= 0 && ovector[2*i+1] > 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
440 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
441 te(pos_match,0) = double (ovector[2*i]+1); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
442 te(pos_match++,1) = double (ovector[2*i+1]); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
443 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
444 } |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
445 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
446 te.resize (pos_match, 2); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
447 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
448 s = double (ovector[0]+1); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
449 e = double (ovector[1]); |
5582 | 450 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
451 const char **listptr; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
452 int status = pcre_get_substring_list (buffer.c_str (), ovector, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
453 matches, &listptr); |
5582 | 454 |
10550 | 455 if (status == PCRE_ERROR_NOMEMORY) |
456 { | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
457 error ("%s: cannot allocate memory in pcre_get_substring_list", |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
458 nm.c_str ()); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
459 pcre_free (re); |
10550 | 460 return 0; |
461 } | |
5582 | 462 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
463 Cell cell_t (dim_vector (1, pos_match)); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
464 string_vector named_tokens (nnames); |
10518
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
465 int pos_offset = 0; |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
466 pos_match = 0; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
467 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
468 for (int i = 1; i < matches; i++) |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
469 { |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
470 if (ovector[2*i] >= 0 && ovector[2*i+1] > 0) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
471 { |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
472 if (i == 1 || ovector[2*i] != ovector[2*i-2] |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
473 || ovector[2*i-1] != ovector[2*i+1]) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
474 { |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
475 if (namecount > 0) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
476 named_tokens(named_idx(i-pos_offset-1)) = |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
477 std::string (*(listptr+nidx[i-pos_offset-1])); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
478 cell_t(pos_match++) = |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
479 std::string (*(listptr+i)); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
480 } |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
481 else |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
482 pos_offset++; |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
483 } |
10546
f5b8b28917a2
Eliminate compile warning about explicit braces
Rik <code@nomad.inbox5.com>
parents:
10518
diff
changeset
|
484 } |
5582 | 485 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
486 m = std::string (*listptr); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
487 t = cell_t; |
5785 | 488 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
489 pcre_free_substring_list (listptr); |
5582 | 490 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
491 regexp_elem new_elem (named_tokens, t, m, te, s, e); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
492 lst.push_back (new_elem); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
493 idx = ovector[1]; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
494 sz++; |
5785 | 495 |
11002
2538d03489cc
avoid infinite loop in regexp searches with PCRE
John W. Eaton <jwe@octave.org>
parents:
10846
diff
changeset
|
496 if (once || idx >= buffer.length ()) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
497 break; |
5582 | 498 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
499 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
500 } |
5582 | 501 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
502 pcre_free (re); |
12464
dfeea9cae79e
require PCRE to build Octave
John W. Eaton <jwe@octave.org>
parents:
12462
diff
changeset
|
503 } |
8619
930a8114197b
For zero length matches in regexp, advance index by one and try again
David Bateman <dbateman@free.fr>
parents:
8477
diff
changeset
|
504 |
5785 | 505 return sz; |
506 } | |
5582 | 507 |
5785 | 508 static octave_value_list |
509 octregexp (const octave_value_list &args, int nargout, const std::string &nm, | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
510 bool case_insensitive) |
5785 | 511 { |
512 octave_value_list retval; | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
513 int nargin = args.length (); |
5785 | 514 std::list<regexp_elem> lst; |
515 string_vector named; | |
516 int nopts; | |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
517 bool once; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
518 |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
519 int sz = octregexp_list (args, nm, case_insensitive, lst, named, nopts, once); |
5785 | 520 |
521 if (! error_state) | |
522 { | |
523 // Converted the linked list in the correct form for the return values | |
524 | |
525 octave_idx_type i = 0; | |
11045
cc3aad9dd3ef
dispatch.cc, fltk_backend.cc, regexp.cc: use octave_scalar_map instead of Octave_map
John W. Eaton <jwe@octave.org>
parents:
11032
diff
changeset
|
526 octave_scalar_map nmap; |
12464
dfeea9cae79e
require PCRE to build Octave
John W. Eaton <jwe@octave.org>
parents:
12462
diff
changeset
|
527 |
13310
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
528 retval.resize (7); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
529 |
5785 | 530 if (sz == 1) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
531 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
532 for (int j = 0; j < named.length (); j++) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
533 nmap.assign (named(j), lst.begin()->named_token (j)); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
534 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
535 retval(5) = nmap; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
536 } |
5785 | 537 else |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
538 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
539 for (int j = 0; j < named.length (); j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
540 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
541 Cell tmp (dim_vector (1, sz)); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
542 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
543 i = 0; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
544 for (const_iterator p = lst.begin (); p != lst.end (); p++) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
545 tmp(i++) = p->named_token (j); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
546 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
547 nmap.assign (named(j), octave_value (tmp)); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
548 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
549 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
550 retval(5) = nmap; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
551 } |
5785 | 552 |
13310
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
553 std::string buffer = args(0).string_value (); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
554 |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
555 if (once) |
13310
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
556 { |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
557 retval(4) = sz ? lst.front ().t : Cell (); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
558 retval(3) = sz ? lst.front ().m : std::string (); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
559 retval(2) = sz ? lst.front ().te : Matrix (); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
560 |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
561 if (sz) |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
562 { |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
563 double e = lst.front ().e; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
564 double s = lst.front ().s; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
565 |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
566 Cell sp (dim_vector (1, 2)); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
567 sp(0) = buffer.substr (0, s-1); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
568 sp(1) = buffer.substr (e); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
569 |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
570 retval(6) = sp; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
571 retval(1) = e; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
572 retval(0) = s; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
573 } |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
574 else |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
575 { |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
576 retval(6) = buffer; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
577 retval(1) = Matrix (); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
578 retval(0) = Matrix (); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
579 } |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
580 } |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
581 else |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
582 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
583 Cell t (dim_vector (1, sz)); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
584 Cell m (dim_vector (1, sz)); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
585 Cell te (dim_vector (1, sz)); |
13310
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
586 NDArray e (dim_vector (1, sz)); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
587 NDArray s (dim_vector (1, sz)); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
588 Cell sp (dim_vector (1, sz+1)); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
589 size_t sp_start = 0; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
590 |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
591 i = 0; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
592 for (const_iterator p = lst.begin (); p != lst.end (); p++) |
13310
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
593 { |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
594 t(i) = p->t; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
595 m(i) = p->m; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
596 te(i) = p->te; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
597 e(i) = p->e; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
598 s(i) = p->s; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
599 sp(i) = buffer.substr (sp_start, p->s-sp_start-1); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
600 sp_start = p->e; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
601 i++; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
602 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
603 |
13310
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
604 sp(i) = buffer.substr (sp_start); |
5785 | 605 |
13310
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
606 retval(6) = sp; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
607 retval(4) = t; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
608 retval(3) = m; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
609 retval(2) = te; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
610 retval(1) = e; |
10551 | 611 retval(0) = s; |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
612 } |
5582 | 613 |
614 // Alter the order of the output arguments | |
615 if (nopts > 0) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
616 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
617 int n = 0; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
618 octave_value_list new_retval; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
619 new_retval.resize (nargout); |
5582 | 620 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
621 OCTAVE_LOCAL_BUFFER (int, arg_used, 6); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
622 for (int j = 0; j < 6; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
623 arg_used[j] = false; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
624 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
625 for (int j = 2; j < nargin; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
626 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
627 int k = 0; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
628 std::string str = args(j).string_value (); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
629 std::transform (str.begin (), str.end (), str.begin (), tolower); |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
630 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
631 if (str.find ("once", 0) == 0 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
632 || str.find ("stringanchors", 0) == 0 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
633 || str.find ("lineanchors", 0) == 0 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
634 || str.find ("matchcase", 0) == 0 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
635 || str.find ("ignorecase", 0) == 0 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
636 || str.find ("dotall", 0) == 0 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
637 || str.find ("dotexceptnewline", 0) == 0 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
638 || str.find ("literalspacing", 0) == 0 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
639 || str.find ("freespacing", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
640 continue; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
641 else if (str.find ("start", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
642 k = 0; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
643 else if (str.find ("end", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
644 k = 1; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
645 else if (str.find ("tokenextents", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
646 k = 2; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
647 else if (str.find ("match", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
648 k = 3; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
649 else if (str.find ("tokens", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
650 k = 4; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
651 else if (str.find ("names", 0) == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
652 k = 5; |
13310
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
653 else if (str.find ("split", 0) == 0) |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
654 k = 6; |
5582 | 655 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
656 new_retval(n++) = retval(k); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
657 arg_used[k] = true; |
5582 | 658 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
659 if (n == nargout) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
660 break; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
661 } |
5582 | 662 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
663 // Fill in the rest of the arguments |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
664 if (n < nargout) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
665 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
666 for (int j = 0; j < 6; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
667 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
668 if (! arg_used[j]) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
669 new_retval(n++) = retval(j); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
670 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
671 } |
5582 | 672 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
673 retval = new_retval; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
674 } |
5582 | 675 } |
676 | |
677 return retval; | |
678 } | |
679 | |
6361 | 680 static octave_value_list |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
681 octcellregexp (const octave_value_list &args, int nargout, |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
682 const std::string &nm, bool case_insensitive) |
6361 | 683 { |
684 octave_value_list retval; | |
685 | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
686 if (args(0).is_cell ()) |
6361 | 687 { |
688 OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout); | |
689 octave_value_list new_args = args; | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
690 Cell cellstr = args(0).cell_value (); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
691 if (args(1).is_cell ()) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
692 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
693 Cell cellpat = args(1).cell_value (); |
6361 | 694 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
695 if (cellpat.numel () == 1) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
696 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
697 for (int j = 0; j < nargout; j++) |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
698 newretval[j].resize (cellstr.dims ()); |
6361 | 699 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
700 new_args(1) = cellpat(0); |
6361 | 701 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
702 for (octave_idx_type i = 0; i < cellstr.numel (); i++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
703 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
704 new_args(0) = cellstr(i); |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
705 octave_value_list tmp = octregexp (new_args, nargout, nm, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
706 case_insensitive); |
6361 | 707 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
708 if (error_state) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
709 break; |
6361 | 710 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
711 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
712 newretval[j](i) = tmp(j); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
713 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
714 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
715 else if (cellstr.numel () == 1) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
716 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
717 for (int j = 0; j < nargout; j++) |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
718 newretval[j].resize (cellpat.dims ()); |
6361 | 719 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
720 new_args(0) = cellstr(0); |
6361 | 721 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
722 for (octave_idx_type i = 0; i < cellpat.numel (); i++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
723 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
724 new_args(1) = cellpat(i); |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
725 octave_value_list tmp = octregexp (new_args, nargout, nm, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
726 case_insensitive); |
6361 | 727 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
728 if (error_state) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
729 break; |
6361 | 730 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
731 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
732 newretval[j](i) = tmp(j); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
733 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
734 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
735 else if (cellstr.numel () == cellpat.numel ()) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
736 { |
6361 | 737 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
738 if (cellstr.dims () != cellpat.dims ()) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
739 error ("%s: Inconsistent cell array dimensions", nm.c_str ()); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
740 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
741 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
742 for (int j = 0; j < nargout; j++) |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
743 newretval[j].resize (cellstr.dims ()); |
6361 | 744 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
745 for (octave_idx_type i = 0; i < cellstr.numel (); i++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
746 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
747 new_args(0) = cellstr(i); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
748 new_args(1) = cellpat(i); |
6361 | 749 |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
750 octave_value_list tmp = octregexp (new_args, nargout, nm, |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
751 case_insensitive); |
6361 | 752 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
753 if (error_state) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
754 break; |
6361 | 755 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
756 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
757 newretval[j](i) = tmp(j); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
758 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
759 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
760 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
761 else |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
762 error ("regexp: cell array arguments must be scalar or equal size"); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
763 } |
6361 | 764 else |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
765 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
766 for (int j = 0; j < nargout; j++) |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
767 newretval[j].resize (cellstr.dims ()); |
6361 | 768 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
769 for (octave_idx_type i = 0; i < cellstr.numel (); i++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
770 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
771 new_args(0) = cellstr(i); |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
772 octave_value_list tmp = octregexp (new_args, nargout, nm, |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
773 case_insensitive); |
6361 | 774 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
775 if (error_state) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
776 break; |
6361 | 777 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
778 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
779 newretval[j](i) = tmp(j); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
780 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
781 } |
6361 | 782 |
783 if (!error_state) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
784 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
785 retval(j) = octave_value (newretval[j]); |
6361 | 786 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
787 else if (args(1).is_cell ()) |
6361 | 788 { |
789 OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout); | |
790 octave_value_list new_args = args; | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
791 Cell cellpat = args(1).cell_value (); |
6361 | 792 |
793 for (int j = 0; j < nargout; j++) | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
794 newretval[j].resize(cellpat.dims ()); |
6361 | 795 |
796 for (octave_idx_type i = 0; i < cellpat.numel (); i++) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
797 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
798 new_args(1) = cellpat(i); |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
799 octave_value_list tmp = octregexp (new_args, nargout, nm, |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
800 case_insensitive); |
6361 | 801 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
802 if (error_state) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
803 break; |
6361 | 804 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
805 for (int j = 0; j < nargout; j++) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
806 newretval[j](i) = tmp(j); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
807 } |
6361 | 808 |
809 if (!error_state) | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
810 { |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
811 for (int j = 0; j < nargout; j++) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
812 retval(j) = octave_value (newretval[j]); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
813 } |
6361 | 814 } |
815 else | |
816 retval = octregexp (args, nargout, nm, case_insensitive); | |
817 | |
818 return retval; | |
819 | |
820 } | |
821 | |
5582 | 822 DEFUN_DLD (regexp, args, nargout, |
823 "-*- texinfo -*-\n\ | |
10840 | 824 @deftypefn {Loadable Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}] =} regexp (@var{str}, @var{pat})\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
825 @deftypefnx {Loadable Function} {[@dots{}] =} regexp (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
826 Regular expression string matching. Search for @var{pat} in @var{str} and\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
827 return the positions and substrings of any matches, or empty values if there\n\ |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
828 are none.\n\ |
5582 | 829 \n\ |
830 The matched pattern @var{pat} can include any of the standard regex\n\ | |
831 operators, including:\n\ | |
832 \n\ | |
833 @table @code\n\ | |
834 @item .\n\ | |
835 Match any character\n\ | |
10840 | 836 \n\ |
5582 | 837 @item * + ? @{@}\n\ |
838 Repetition operators, representing\n\ | |
839 @table @code\n\ | |
840 @item *\n\ | |
841 Match zero or more times\n\ | |
10840 | 842 \n\ |
5582 | 843 @item +\n\ |
844 Match one or more times\n\ | |
10840 | 845 \n\ |
5582 | 846 @item ?\n\ |
847 Match zero or one times\n\ | |
10840 | 848 \n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
849 @item @{@var{n}@}\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
850 Match exactly @var{n} times\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
851 \n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
852 @item @{@var{n},@}\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
853 Match @var{n} or more times\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
854 \n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
855 @item @{@var{m},@var{n}@}\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
856 Match between @var{m} and @var{n} times\n\ |
5582 | 857 @end table\n\ |
10840 | 858 \n\ |
5582 | 859 @item [@dots{}] [^@dots{}]\n\ |
10840 | 860 \n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
861 List operators. The pattern will match any character listed between \"[\"\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
862 and \"]\". If the first character is \"^\" then the pattern is inverted and\n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
863 any character except those listed between brackets will match.\n\ |
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
864 \n\ |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
865 Escape sequences defined below can also be used inside list\n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
866 operators. For example, a template for a floating point number might be\n\ |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
867 @code{[-+.\\d]+}.\n\ |
10840 | 868 \n\ |
5582 | 869 @item ()\n\ |
870 Grouping operator\n\ | |
10840 | 871 \n\ |
5582 | 872 @item |\n\ |
9036
58604c45ca74
Cleanup of data types related documentation
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
873 Alternation operator. Match one of a choice of regular expressions. The\n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
874 alternatives must be delimited by the grouping operator @code{()} above.\n\ |
10840 | 875 \n\ |
5582 | 876 @item ^ $\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
877 Anchoring operators. Requires pattern to occur at the start (@code{^}) or\n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
878 end (@code{$}) of the string.\n\ |
5582 | 879 @end table\n\ |
880 \n\ | |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
881 In addition, the following escaped characters have special meaning. Note,\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
882 it is recommended to quote @var{pat} in single quotes, rather than double\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
883 quotes, to avoid the escape sequences being interpreted by Octave before\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
884 being passed to @code{regexp}.\n\ |
5582 | 885 \n\ |
886 @table @code\n\ | |
887 @item \\b\n\ | |
888 Match a word boundary\n\ | |
10840 | 889 \n\ |
5582 | 890 @item \\B\n\ |
891 Match within a word\n\ | |
10840 | 892 \n\ |
5582 | 893 @item \\w\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
894 Match any word character\n\ |
10840 | 895 \n\ |
5582 | 896 @item \\W\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
897 Match any non-word character\n\ |
10840 | 898 \n\ |
5582 | 899 @item \\<\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
900 Match the beginning of a word\n\ |
10840 | 901 \n\ |
5582 | 902 @item \\>\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
903 Match the end of a word\n\ |
10840 | 904 \n\ |
5582 | 905 @item \\s\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
906 Match any whitespace character\n\ |
10840 | 907 \n\ |
5582 | 908 @item \\S\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
909 Match any non-whitespace character\n\ |
10840 | 910 \n\ |
5582 | 911 @item \\d\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
912 Match any digit\n\ |
10840 | 913 \n\ |
5582 | 914 @item \\D\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
915 Match any non-digit\n\ |
5582 | 916 @end table\n\ |
917 \n\ | |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
918 The outputs of @code{regexp} default to the order given below\n\ |
5582 | 919 \n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
920 @table @var\n\ |
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
921 @item s\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
922 The start indices of each matching substring\n\ |
5582 | 923 \n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
924 @item e\n\ |
5582 | 925 The end indices of each matching substring\n\ |
926 \n\ | |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
927 @item te\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
928 The extents of each matched token surrounded by @code{(@dots{})} in\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
929 @var{pat}\n\ |
5582 | 930 \n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
931 @item m\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
932 A cell array of the text of each match\n\ |
5582 | 933 \n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
934 @item t\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
935 A cell array of the text of each token matched\n\ |
5582 | 936 \n\ |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
937 @item nm\n\ |
5582 | 938 A structure containing the text of each matched named token, with the name\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
939 being used as the fieldname. A named token is denoted by\n\ |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
940 @code{(?<name>@dots{})}.\n\ |
13310
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
941 @item sp\n\ |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
942 A cell array of the text not returned by match.\n\ |
5582 | 943 @end table\n\ |
944 \n\ | |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
945 Particular output arguments, or the order of the output arguments, can be\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
946 selected by additional @var{opt} arguments. These are strings and the\n\ |
5582 | 947 correspondence between the output arguments and the optional argument\n\ |
948 are\n\ | |
949 \n\ | |
950 @multitable @columnfractions 0.2 0.3 0.3 0.2\n\ | |
951 @item @tab 'start' @tab @var{s} @tab\n\ | |
952 @item @tab 'end' @tab @var{e} @tab\n\ | |
953 @item @tab 'tokenExtents' @tab @var{te} @tab\n\ | |
954 @item @tab 'match' @tab @var{m} @tab\n\ | |
955 @item @tab 'tokens' @tab @var{t} @tab\n\ | |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
956 @item @tab 'names' @tab @var{nm} @tab\n\ |
13310
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
957 @item @tab 'split' @tab @var{sp} @tab\n\ |
5582 | 958 @end multitable\n\ |
959 \n\ | |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
960 Additional arguments are summarized below.\n\ |
5779 | 961 \n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
962 @table @samp\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
963 @item once\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
964 Return only the first occurrence of the pattern.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
965 \n\ |
5779 | 966 @item matchcase\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
967 Make the matching case sensitive. (default)\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
968 \n\ |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
969 Alternatively, use (?-i) in the pattern.\n\ |
10840 | 970 \n\ |
5779 | 971 @item ignorecase\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
972 Ignore case when matching the pattern to the string.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
973 \n\ |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
974 Alternatively, use (?i) in the pattern.\n\ |
10840 | 975 \n\ |
5779 | 976 @item stringanchors\n\ |
12642
f96b9b9f141b
doc: Periodic grammarcheck and spellcheck of documentation.
Rik <octave@nomad.inbox5.com>
parents:
12464
diff
changeset
|
977 Match the anchor characters at the beginning and end of the string.\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
978 (default)\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
979 \n\ |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
980 Alternatively, use (?-m) in the pattern.\n\ |
10840 | 981 \n\ |
5779 | 982 @item lineanchors\n\ |
983 Match the anchor characters at the beginning and end of the line.\n\ | |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
984 \n\ |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
985 Alternatively, use (?m) in the pattern.\n\ |
10840 | 986 \n\ |
5779 | 987 @item dotall\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
988 The pattern @code{.} matches all characters including the newline character.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
989 (default)\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
990 \n\ |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
991 Alternatively, use (?s) in the pattern.\n\ |
10840 | 992 \n\ |
5779 | 993 @item dotexceptnewline\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
994 The pattern @code{.} matches all characters except the newline character.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
995 \n\ |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
996 Alternatively, use (?-s) in the pattern.\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
997 \n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
998 @item literalspacing\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
999 All characters in the pattern, including whitespace, are significant and are\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1000 used in pattern matching. (default)\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1001 \n\ |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1002 Alternatively, use (?-x) in the pattern.\n\ |
10840 | 1003 \n\ |
5779 | 1004 @item freespacing\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1005 The pattern may include arbitrary whitespace and also comments beginning with\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1006 the character @samp{#}.\n\ |
10840 | 1007 \n\ |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1008 Alternatively, use (?x) in the pattern.\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1009 \n\ |
5779 | 1010 @end table\n\ |
11572
7d6d8c1e471f
Grammarcheck Texinfo for files in src directory.
Rik <octave@nomad.inbox5.com>
parents:
11553
diff
changeset
|
1011 @seealso{regexpi, strfind, regexprep}\n\ |
5582 | 1012 @end deftypefn") |
1013 { | |
6361 | 1014 octave_value_list retval; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1015 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1016 int nargin = args.length (); |
6361 | 1017 |
1018 if (nargin < 2) | |
1019 print_usage (); | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1020 else if (args(0).is_cell () || args(1).is_cell ()) |
6361 | 1021 retval = octcellregexp (args, nargout, "regexp", false); |
1022 else | |
1023 retval = octregexp (args, nargout, "regexp", false); | |
1024 | |
1025 return retval; | |
5582 | 1026 } |
1027 | |
1028 /* | |
1029 | |
8140
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1030 ## PCRE_ERROR_MATCHLIMIT test |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1031 %!test |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1032 %! s=sprintf('\t4\n0000\t-0.00\t-0.0000\t4\t-0.00\t-0.0000\t4\n0000\t-0.00\t-0.0000\t0\t-0.00\t-'); |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1033 %! ws = warning("query"); |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1034 %! unwind_protect |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1035 %! warning("off"); |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1036 %! regexp(s, '(\s*-*\d+[.]*\d*\s*)+\n'); |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1037 %! unwind_protect_cleanup |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1038 %! warning(ws); |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1039 %! end_unwind_protect |
cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
Thomas Weber <thomas.weber.mail@gmail.com>
parents:
8093
diff
changeset
|
1040 |
5582 | 1041 ## seg-fault test |
1042 %!assert(regexp("abcde","."),[1,2,3,4,5]) | |
1043 | |
1044 ## Check that anchoring of pattern works correctly | |
1045 %!assert(regexp('abcabc','^abc'),1); | |
1046 %!assert(regexp('abcabc','abc$'),4); | |
5785 | 1047 %!assert(regexp('abcabc','^abc$'),zeros(1,0)); |
5582 | 1048 |
1049 %!test | |
1050 %! [s, e, te, m, t] = regexp(' No Match ', 'f(.*)uck'); | |
5785 | 1051 %! assert (s,zeros(1,0)) |
1052 %! assert (e,zeros(1,0)) | |
1053 %! assert (te,cell(1,0)) | |
1054 %! assert (m, cell(1,0)) | |
1055 %! assert (t, cell(1,0)) | |
5582 | 1056 |
1057 %!test | |
1058 %! [s, e, te, m, t] = regexp(' FiRetrUck ', 'f(.*)uck'); | |
5785 | 1059 %! assert (s,zeros(1,0)) |
1060 %! assert (e,zeros(1,0)) | |
1061 %! assert (te,cell(1,0)) | |
1062 %! assert (m, cell(1,0)) | |
1063 %! assert (t, cell(1,0)) | |
5582 | 1064 |
1065 %!test | |
1066 %! [s, e, te, m, t] = regexp(' firetruck ', 'f(.*)uck'); | |
1067 %! assert (s,2) | |
1068 %! assert (e,10) | |
1069 %! assert (te{1},[3,7]) | |
1070 %! assert (m{1}, 'firetruck') | |
1071 %! assert (t{1}{1}, 'iretr') | |
1072 | |
1073 %!test | |
1074 %! [s, e, te, m, t] = regexp('short test string','\w*r\w*'); | |
1075 %! assert (s,[1,12]) | |
1076 %! assert (e,[5,17]) | |
1077 %! assert (size(te), [1,2]) | |
1078 %! assert (isempty(te{1})) | |
1079 %! assert (isempty(te{2})) | |
1080 %! assert (m{1},'short') | |
1081 %! assert (m{2},'string') | |
1082 %! assert (size(t), [1,2]) | |
1083 %! assert (isempty(t{1})) | |
1084 %! assert (isempty(t{2})) | |
1085 | |
1086 %!test | |
1087 %! [s, e, te, m, t] = regexp('short test string','\w*r\w*','once'); | |
1088 %! assert (s,1) | |
1089 %! assert (e,5) | |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1090 %! assert (isempty(te)) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1091 %! assert (m,'short') |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1092 %! assert (isempty(t)) |
5582 | 1093 |
1094 %!test | |
1095 %! [m, te, e, s, t] = regexp('short test string','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens'); | |
1096 %! assert (s,1) | |
1097 %! assert (e,5) | |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1098 %! assert (isempty(te)) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1099 %! assert (m,'short') |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1100 %! assert (isempty(t)) |
5582 | 1101 |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1102 %!test |
7242 | 1103 %! [s, e, te, m, t, nm] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)'); |
1104 %! assert (s,1) | |
1105 %! assert (e,10) | |
1106 %! assert (size(te), [1,1]) | |
1107 %! assert (te{1}, [1 5; 7, 10]) | |
1108 %! assert (m{1},'short test') | |
1109 %! assert (size(t),[1,1]) | |
1110 %! assert (t{1}{1},'short') | |
1111 %! assert (t{1}{2},'test') | |
1112 %! assert (size(nm), [1,1]) | |
1113 %! assert (!isempty(fieldnames(nm))) | |
1114 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) | |
1115 %! assert (nm.word1,'short') | |
1116 %! assert (nm.word2,'test') | |
5582 | 1117 |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1118 %!test |
7242 | 1119 %! [nm, m, te, e, s, t] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens'); |
1120 %! assert (s,1) | |
1121 %! assert (e,10) | |
1122 %! assert (size(te), [1,1]) | |
1123 %! assert (te{1}, [1 5; 7, 10]) | |
1124 %! assert (m{1},'short test') | |
1125 %! assert (size(t),[1,1]) | |
1126 %! assert (t{1}{1},'short') | |
1127 %! assert (t{1}{2},'test') | |
1128 %! assert (size(nm), [1,1]) | |
1129 %! assert (!isempty(fieldnames(nm))) | |
1130 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) | |
1131 %! assert (nm.word1,'short') | |
1132 %! assert (nm.word2,'test') | |
5619 | 1133 |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1134 %!test |
7242 | 1135 %! [t, nm] = regexp("John Davis\nRogers, James",'(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)','tokens','names'); |
1136 %! assert (size(t), [1,2]); | |
1137 %! assert (t{1}{1},'John'); | |
1138 %! assert (t{1}{2},'Davis'); | |
1139 %! assert (t{2}{1},'Rogers'); | |
1140 %! assert (t{2}{2},'James'); | |
1141 %! assert (size(nm), [1,1]); | |
1142 %! assert (nm.first{1},'John'); | |
1143 %! assert (nm.first{2},'James'); | |
1144 %! assert (nm.last{1},'Davis'); | |
1145 %! assert (nm.last{2},'Rogers'); | |
5582 | 1146 |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1147 %!test |
10518
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
1148 %! # Parenthesis in named token (ie (int)) causes a problem |
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
1149 %! assert (regexp('qwe int asd', ['(?<typestr>(int))'], 'names'), struct ('typestr', 'int')); |
fcafe0e9bd58
Handle repeated matches in matches returned by pcre
David Bateman <dbateman@free.fr>
parents:
10504
diff
changeset
|
1150 |
5779 | 1151 %!assert(regexp("abc\nabc",'.'),[1:7]) |
1152 %!assert(regexp("abc\nabc",'.','dotall'),[1:7]) | |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1153 %!test |
7242 | 1154 %! assert(regexp("abc\nabc",'(?s).'),[1:7]) |
1155 %! assert(regexp("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7]) | |
1156 %! assert(regexp("abc\nabc",'(?-s).'),[1,2,3,5,6,7]) | |
5779 | 1157 |
1158 %!assert(regexp("caseCaSe",'case'),1) | |
1159 %!assert(regexp("caseCaSe",'case',"matchcase"),1) | |
1160 %!assert(regexp("caseCaSe",'case',"ignorecase"),[1,5]) | |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1161 %!test |
7242 | 1162 %! assert(regexp("caseCaSe",'(?-i)case'),1) |
1163 %! assert(regexp("caseCaSe",'(?i)case'),[1,5]) | |
5779 | 1164 |
1165 %!assert (regexp("abc\nabc",'c$'),7) | |
1166 %!assert (regexp("abc\nabc",'c$',"stringanchors"),7) | |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1167 %!test |
7242 | 1168 %! assert (regexp("abc\nabc",'(?-m)c$'),7) |
1169 %! assert (regexp("abc\nabc",'c$',"lineanchors"),[3,7]) | |
1170 %! assert (regexp("abc\nabc",'(?m)c$'),[3,7]) | |
5779 | 1171 |
1172 %!assert (regexp("this word",'s w'),4) | |
1173 %!assert (regexp("this word",'s w','literalspacing'),4) | |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1174 %!test |
7242 | 1175 %! assert (regexp("this word",'(?-x)s w','literalspacing'),4) |
1176 %! assert (regexp("this word",'s w','freespacing'),zeros(1,0)) | |
1177 %! assert (regexp("this word",'(?x)s w'),zeros(1,0)) | |
5779 | 1178 |
5582 | 1179 %!error regexp('string', 'tri', 'BadArg'); |
1180 %!error regexp('string'); | |
1181 | |
6361 | 1182 %!assert(regexp({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)}) |
1183 %!assert(regexp({'asdfg-dfd','-dfd-dfd-','qasfdfdaq'},'-'),{6,[1,5,9],zeros(1,0)}) | |
1184 %!assert(regexp({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},{'-';'f';'q'}),{6;[3,7];[1,9]}) | |
1185 %!assert(regexp('Strings',{'t','s'}),{2,7}) | |
1186 | |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
1187 ## Test case for lookaround operators |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1188 %!test |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1189 %! assert(regexp('Iraq','q(?!u)'),4) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1190 %! assert(regexp('quit','q(?!u)'), zeros(1,0)) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1191 %! assert(regexp('quit','q(?=u)','match'), {'q'}) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1192 %! assert(regexp("quit",'q(?=u+)','match'), {'q'}) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1193 %! assert(regexp("qit",'q(?=u+)','match'), cell(1,0)) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1194 %! assert(regexp("qit",'q(?=u*)','match'), {'q'}) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1195 %! assert(regexp('thingamabob','(?<=a)b'), 9) |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
1196 |
13310
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1197 ## Tests for split option. |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1198 %!shared str |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1199 %! str = "foo bar foo"; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1200 %!test |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1201 %! [a, b] = regexp (str, "f..", "match", "split"); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1202 %! assert (a, {"foo", "foo"}); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1203 %! assert (b, {"", " bar ", ""}); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1204 %!test |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1205 %! [a, b] = regexp (str, "f..", "match", "split", "once"); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1206 %! assert (a, "foo"); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1207 %! assert (b, {"", " bar foo"}); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1208 %!test |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1209 %! [a, b] = regexp (str, "fx.", "match", "split"); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1210 %! assert (a, cell (1, 0)); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1211 %! assert (b, {"foo bar foo"}); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1212 %!test |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1213 %! [a, b] = regexp (str, "fx.", "match", "split", "once"); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1214 %! assert (a, ""); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1215 %! assert (b, "foo bar foo") |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1216 |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1217 %!shared str |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1218 %! str = "foo bar"; |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1219 %!test |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1220 %! [a, b] = regexp (str, "f..", "match", "split"); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1221 %! assert (a, {"foo"}); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1222 %! assert (b, {"", " bar"}); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1223 %!test |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1224 %! [a, b] = regexp (str, "b..", "match", "split"); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1225 %! assert (a, {"bar"}); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1226 %! assert (b, {"foo ", ""}); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1227 %!test |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1228 %! [a, b] = regexp (str, "x", "match", "split"); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1229 %! assert (a, cell (1, 0)); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1230 %! assert (b, {"foo bar"}); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1231 %!test |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1232 %! [a, b] = regexp (str, "[o]+", "match", "split"); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1233 %! assert (a, {"oo"}); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1234 %! assert (b, {"f", " bar"}); |
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1235 |
5582 | 1236 */ |
1237 | |
6549 | 1238 DEFUN_DLD (regexpi, args, nargout, |
5582 | 1239 "-*- texinfo -*-\n\ |
10840 | 1240 @deftypefn {Loadable Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}] =} regexpi (@var{str}, @var{pat})\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1241 @deftypefnx {Loadable Function} {[@dots{}] =} regexpi (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\ |
5582 | 1242 \n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1243 Case insensitive regular expression string matching. Search for @var{pat} in\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1244 @var{str} and return the positions and substrings of any matches, or empty\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1245 values if there are none. @xref{doc-regexp,,regexp}, for details on the\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1246 syntax of the search pattern.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1247 @seealso{regexp}\n\ |
5582 | 1248 @end deftypefn") |
1249 { | |
6361 | 1250 octave_value_list retval; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1251 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1252 int nargin = args.length (); |
6361 | 1253 |
1254 if (nargin < 2) | |
1255 print_usage (); | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1256 else if (args(0).is_cell () || args(1).is_cell ()) |
6361 | 1257 retval = octcellregexp (args, nargout, "regexpi", true); |
1258 else | |
1259 retval = octregexp (args, nargout, "regexpi", true); | |
1260 | |
1261 return retval; | |
5582 | 1262 } |
1263 | |
1264 /* | |
1265 | |
1266 ## seg-fault test | |
1267 %!assert(regexpi("abcde","."),[1,2,3,4,5]) | |
1268 | |
1269 ## Check that anchoring of pattern works correctly | |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1270 %!assert(regexpi('abcabc','^ABC'),1); |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1271 %!assert(regexpi('abcabc','ABC$'),4); |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1272 %!assert(regexpi('abcabc','^ABC$'),zeros(1,0)); |
5582 | 1273 |
1274 %!test | |
1275 %! [s, e, te, m, t] = regexpi(' No Match ', 'f(.*)uck'); | |
5785 | 1276 %! assert (s,zeros(1,0)) |
1277 %! assert (e,zeros(1,0)) | |
1278 %! assert (te,cell(1,0)) | |
1279 %! assert (m, cell(1,0)) | |
1280 %! assert (t, cell(1,0)) | |
5582 | 1281 |
1282 %!test | |
1283 %! [s, e, te, m, t] = regexpi(' FiRetrUck ', 'f(.*)uck'); | |
1284 %! assert (s,2) | |
1285 %! assert (e,10) | |
1286 %! assert (te{1},[3,7]) | |
1287 %! assert (m{1}, 'FiRetrUck') | |
1288 %! assert (t{1}{1}, 'iRetr') | |
1289 | |
1290 %!test | |
1291 %! [s, e, te, m, t] = regexpi(' firetruck ', 'f(.*)uck'); | |
1292 %! assert (s,2) | |
1293 %! assert (e,10) | |
1294 %! assert (te{1},[3,7]) | |
1295 %! assert (m{1}, 'firetruck') | |
1296 %! assert (t{1}{1}, 'iretr') | |
1297 | |
1298 %!test | |
1299 %! [s, e, te, m, t] = regexpi('ShoRt Test String','\w*r\w*'); | |
1300 %! assert (s,[1,12]) | |
1301 %! assert (e,[5,17]) | |
1302 %! assert (size(te), [1,2]) | |
1303 %! assert (isempty(te{1})) | |
1304 %! assert (isempty(te{2})) | |
1305 %! assert (m{1},'ShoRt') | |
1306 %! assert (m{2},'String') | |
1307 %! assert (size(t), [1,2]) | |
1308 %! assert (isempty(t{1})) | |
1309 %! assert (isempty(t{2})) | |
1310 | |
1311 %!test | |
1312 %! [s, e, te, m, t] = regexpi('ShoRt Test String','\w*r\w*','once'); | |
1313 %! assert (s,1) | |
1314 %! assert (e,5) | |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1315 %! assert (isempty(te)) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1316 %! assert (m,'ShoRt') |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1317 %! assert (isempty(t)) |
5582 | 1318 |
1319 %!test | |
1320 %! [m, te, e, s, t] = regexpi('ShoRt Test String','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens'); | |
1321 %! assert (s,1) | |
1322 %! assert (e,5) | |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1323 %! assert (isempty(te)) |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1324 %! assert (m,'ShoRt') |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1325 %! assert (isempty(t)) |
5582 | 1326 |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1327 %!test |
7242 | 1328 %! [s, e, te, m, t, nm] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)'); |
1329 %! assert (s,1) | |
1330 %! assert (e,10) | |
1331 %! assert (size(te), [1,1]) | |
1332 %! assert (te{1}, [1 5; 7, 10]) | |
1333 %! assert (m{1},'ShoRt Test') | |
1334 %! assert (size(t),[1,1]) | |
1335 %! assert (t{1}{1},'ShoRt') | |
1336 %! assert (t{1}{2},'Test') | |
1337 %! assert (size(nm), [1,1]) | |
1338 %! assert (!isempty(fieldnames(nm))) | |
1339 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) | |
1340 %! assert (nm.word1,'ShoRt') | |
1341 %! assert (nm.word2,'Test') | |
5582 | 1342 |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1343 %!test |
7242 | 1344 %! [nm, m, te, e, s, t] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens'); |
1345 %! assert (s,1) | |
1346 %! assert (e,10) | |
1347 %! assert (size(te), [1,1]) | |
1348 %! assert (te{1}, [1 5; 7, 10]) | |
1349 %! assert (m{1},'ShoRt Test') | |
1350 %! assert (size(t),[1,1]) | |
1351 %! assert (t{1}{1},'ShoRt') | |
1352 %! assert (t{1}{2},'Test') | |
1353 %! assert (size(nm), [1,1]) | |
1354 %! assert (!isempty(fieldnames(nm))) | |
1355 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) | |
1356 %! assert (nm.word1,'ShoRt') | |
1357 %! assert (nm.word2,'Test') | |
5582 | 1358 |
5779 | 1359 %!assert(regexpi("abc\nabc",'.'),[1:7]) |
1360 %!assert(regexpi("abc\nabc",'.','dotall'),[1:7]) | |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1361 %!test |
7242 | 1362 %! assert(regexpi("abc\nabc",'(?s).'),[1:7]) |
1363 %! assert(regexpi("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7]) | |
1364 %! assert(regexpi("abc\nabc",'(?-s).'),[1,2,3,5,6,7]) | |
5779 | 1365 |
1366 %!assert(regexpi("caseCaSe",'case'),[1,5]) | |
1367 %!assert(regexpi("caseCaSe",'case',"matchcase"),1) | |
1368 %!assert(regexpi("caseCaSe",'case',"ignorecase"),[1,5]) | |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1369 %!test |
7242 | 1370 %! assert(regexpi("caseCaSe",'(?-i)case'),1) |
1371 %! assert(regexpi("caseCaSe",'(?i)case'),[1,5]) | |
5779 | 1372 |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1373 %!assert (regexpi("abc\nabc",'C$'),7) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1374 %!assert (regexpi("abc\nabc",'C$',"stringanchors"),7) |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1375 %!test |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1376 %! assert (regexpi("abc\nabc",'(?-m)C$'),7) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1377 %! assert (regexpi("abc\nabc",'C$',"lineanchors"),[3,7]) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1378 %! assert (regexpi("abc\nabc",'(?m)C$'),[3,7]) |
5779 | 1379 |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1380 %!assert (regexpi("this word",'S w'),4) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1381 %!assert (regexpi("this word",'S w','literalspacing'),4) |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1382 %!test |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1383 %! assert (regexpi("this word",'(?-x)S w','literalspacing'),4) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1384 %! assert (regexpi("this word",'S w','freespacing'),zeros(1,0)) |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1385 %! assert (regexpi("this word",'(?x)S w'),zeros(1,0)) |
5779 | 1386 |
5582 | 1387 %!error regexpi('string', 'tri', 'BadArg'); |
1388 %!error regexpi('string'); | |
1389 | |
6361 | 1390 %!assert(regexpi({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)}) |
1391 %!assert(regexpi({'asdfg-dfd','-dfd-dfd-','qasfdfdaq'},'-'),{6,[1,5,9],zeros(1,0)}) | |
1392 %!assert(regexpi({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},{'-';'f';'q'}),{6;[3,7];[1,9]}) | |
1393 %!assert(regexpi('Strings',{'t','s'}),{2,[1,7]}) | |
1394 | |
5582 | 1395 */ |
1396 | |
6361 | 1397 |
1398 static octave_value | |
1399 octregexprep (const octave_value_list &args, const std::string &nm) | |
5785 | 1400 { |
6361 | 1401 octave_value retval; |
5785 | 1402 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1403 int nargin = args.length (); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1404 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1405 // Make sure we have string, pattern, replacement |
5785 | 1406 const std::string buffer = args(0).string_value (); |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1407 if (error_state) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1408 return retval; |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1409 |
5785 | 1410 const std::string pattern = args(1).string_value (); |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1411 if (error_state) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1412 return retval; |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1413 |
5785 | 1414 const std::string replacement = args(2).string_value (); |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1415 if (error_state) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1416 return retval; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1417 |
5785 | 1418 // Pack options excluding 'tokenize' and various output |
1419 // reordering strings into regexp arg list | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1420 octave_value_list regexpargs (nargin-1, octave_value ()); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1421 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1422 regexpargs(0) = args (0); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1423 regexpargs(1) = args (1); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1424 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1425 int len = 2; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1426 for (int i = 3; i < nargin; i++) |
5785 | 1427 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1428 const std::string opt = args(i).string_value (); |
5785 | 1429 if (opt != "tokenize" && opt != "start" && opt != "end" |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1430 && opt != "tokenextents" && opt != "match" && opt != "tokens" |
13310
583940a28bfd
handle "split" option for regexp
John W. Eaton <jwe@octave.org>
parents:
13227
diff
changeset
|
1431 && opt != "names" && opt != "split" && opt != "warnings") |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1432 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1433 regexpargs(len++) = args(i); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1434 } |
5785 | 1435 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1436 regexpargs.resize (len); |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1437 |
5785 | 1438 // Identify replacement tokens; build a vector of group numbers in |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1439 // the replacement string so that we can quickly calculate the size |
5785 | 1440 // of the replacement. |
1441 int tokens = 0; | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1442 for (size_t i=1; i < replacement.size (); i++) |
5785 | 1443 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1444 if (replacement[i-1]=='$' && isdigit (replacement[i])) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1445 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1446 tokens++; |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1447 i++; |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1448 } |
5785 | 1449 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1450 std::vector<int> token (tokens); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1451 |
5785 | 1452 int kk = 0; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1453 for (size_t i = 1; i < replacement.size (); i++) |
5785 | 1454 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1455 if (replacement[i-1]=='$' && isdigit (replacement[i])) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1456 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1457 token[kk++] = replacement[i]-'0'; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1458 i++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1459 } |
5785 | 1460 } |
1461 | |
1462 // Perform replacement | |
1463 std::string rep; | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1464 |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1465 if (tokens > 0) |
5785 | 1466 { |
1467 std::list<regexp_elem> lst; | |
1468 string_vector named; | |
1469 int nopts; | |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1470 bool once; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1471 int sz = octregexp_list (regexpargs, nm , false, lst, named, nopts, once); |
5785 | 1472 |
1473 if (error_state) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1474 return retval; |
5785 | 1475 if (sz == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1476 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1477 retval = args(0); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1478 return retval; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1479 } |
5785 | 1480 |
1481 // Determine replacement length | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1482 const size_t replen = replacement.size () - 2*tokens; |
5785 | 1483 int delta = 0; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1484 const_iterator p = lst.begin (); |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1485 for (int i = 0; i < sz; i++) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1486 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1487 OCTAVE_QUIT; |
5785 | 1488 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1489 const Matrix pairs (p->te); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1490 size_t pairlen = 0; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1491 for (int j = 0; j < tokens; j++) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1492 { |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1493 if (token[j] == 0) |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1494 pairlen += static_cast<size_t> (p->e - p->s) + 1; |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1495 else if (token[j] <= pairs.rows ()) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1496 pairlen += static_cast<size_t> (pairs(token[j]-1,1) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1497 - pairs(token[j]-1,0)) + 1; |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1498 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1499 delta += static_cast<int> (replen + pairlen) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1500 - static_cast<int> (p->e - p->s + 1); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1501 p++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1502 } |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1503 |
5785 | 1504 // Build replacement string |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1505 rep.reserve (buffer.size () + delta); |
5785 | 1506 size_t from = 0; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1507 p = lst.begin (); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1508 for (int i = 0; i < sz; i++) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1509 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1510 OCTAVE_QUIT; |
5785 | 1511 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1512 const Matrix pairs (p->te); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1513 rep.append (&buffer[from], static_cast<size_t> (p->s - 1) - from); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1514 from = static_cast<size_t> (p->e - 1) + 1; |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1515 for (size_t j = 1; j < replacement.size (); j++) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1516 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1517 if (replacement[j-1]=='$' && isdigit (replacement[j])) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1518 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1519 int k = replacement[j]-'0'; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1520 if (k == 0) |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1521 { |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1522 // replace with entire match |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1523 rep.append (&buffer[static_cast<size_t> (p->e - 1)], |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1524 static_cast<size_t> (p->e - p->s) + 1); |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1525 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1526 else if (k <= pairs.rows ()) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1527 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1528 // replace with group capture |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1529 rep.append (&buffer[static_cast<size_t> (pairs(k-1,0)-1)], |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1530 static_cast<size_t> (pairs(k-1,1) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1531 - pairs(k-1,0)) + 1); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1532 } |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1533 else |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1534 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1535 // replace with nothing |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1536 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1537 j++; |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1538 } |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1539 else |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1540 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1541 rep.append (1, replacement[j-1]); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1542 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1543 if (j+1 == replacement.size ()) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1544 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1545 rep.append (1, replacement[j]); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1546 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1547 } |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1548 p++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1549 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1550 rep.append (&buffer[from], buffer.size () - from); |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1551 } |
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1552 else |
5785 | 1553 { |
1554 std::list<regexp_elem> lst; | |
1555 string_vector named; | |
1556 int nopts; | |
7893
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1557 bool once; |
eb9ccb44ea41
make regexp(...,'once') matlab compatible
Jaroslav Hajek <highegg@gmail.com>
parents:
7520
diff
changeset
|
1558 int sz = octregexp_list (regexpargs, nm, false, lst, named, nopts, once); |
5785 | 1559 |
1560 if (error_state) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1561 return retval; |
5785 | 1562 if (sz == 0) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1563 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1564 retval = args (0); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1565 return retval; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1566 } |
5785 | 1567 |
1568 // Determine replacement length | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1569 const size_t replen = replacement.size (); |
5785 | 1570 int delta = 0; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1571 const_iterator p = lst.begin (); |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1572 for (int i = 0; i < sz; i++) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1573 { |
5785 | 1574 OCTAVE_QUIT; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1575 delta += static_cast<int> (replen) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1576 - static_cast<int> (p->e - p->s + 1); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1577 p++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1578 } |
5785 | 1579 |
1580 // Build replacement string | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1581 rep.reserve (buffer.size () + delta); |
5785 | 1582 size_t from = 0; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1583 p = lst.begin (); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1584 for (int i = 0; i < sz; i++) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1585 { |
5785 | 1586 OCTAVE_QUIT; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1587 rep.append (&buffer[from], static_cast<size_t> (p->s - 1) - from); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1588 from = static_cast<size_t> (p->e - 1) + 1; |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1589 rep.append (replacement); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1590 p++; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1591 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1592 rep.append (&buffer[from], buffer.size () - from); |
5785 | 1593 } |
11586
12df7854fa7c
strip trailing whitespace from source files
John W. Eaton <jwe@octave.org>
parents:
11572
diff
changeset
|
1594 |
6361 | 1595 retval = rep; |
1596 return retval; | |
1597 } | |
1598 | |
6549 | 1599 DEFUN_DLD (regexprep, args, , |
6361 | 1600 "-*- texinfo -*-\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1601 @deftypefn {Loadable Function} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr})\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1602 @deftypefnx {Loadable Function} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr}, \"@var{opt1}\", @dots{})\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1603 Replace occurrences of pattern @var{pat} in @var{string} with @var{repstr}.\n\ |
6361 | 1604 \n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1605 The pattern is a regular expression as documented for @code{regexp}.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1606 @xref{doc-regexp,,regexp}.\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1607 \n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1608 The replacement string may contain @code{$i}, which substitutes\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1609 for the ith set of parentheses in the match string. For example,\n\ |
10840 | 1610 \n\ |
6361 | 1611 @example\n\ |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1612 regexprep(\"Bill Dunn\",'(\\w+) (\\w+)','$2, $1')\n\ |
6361 | 1613 @end example\n\ |
10840 | 1614 \n\ |
10846
a4f482e66b65
Grammarcheck more of the documentation.
Rik <octave@nomad.inbox5.com>
parents:
10840
diff
changeset
|
1615 @noindent\n\ |
6361 | 1616 returns \"Dunn, Bill\"\n\ |
1617 \n\ | |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1618 Options in addition to those of @code{regexp} are\n\ |
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1619 \n\ |
6361 | 1620 @table @samp\n\ |
1621 \n\ | |
1622 @item once\n\ | |
7001 | 1623 Replace only the first occurrence of @var{pat} in the result.\n\ |
6361 | 1624 \n\ |
1625 @item warnings\n\ | |
1626 This option is present for compatibility but is ignored.\n\ | |
1627 \n\ | |
1628 @end table\n\ | |
11572
7d6d8c1e471f
Grammarcheck Texinfo for files in src directory.
Rik <octave@nomad.inbox5.com>
parents:
11553
diff
changeset
|
1629 @seealso{regexp, regexpi, strrep}\n\ |
6361 | 1630 @end deftypefn") |
1631 { | |
1632 octave_value_list retval; | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1633 int nargin = args.length (); |
6361 | 1634 |
1635 if (nargin < 3) | |
1636 { | |
1637 print_usage (); | |
1638 return retval; | |
1639 } | |
1640 | |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1641 if (args(0).is_cell () || args(1).is_cell () || args(2).is_cell ()) |
6361 | 1642 { |
1643 Cell str; | |
1644 Cell pat; | |
1645 Cell rep; | |
6495 | 1646 dim_vector dv0; |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1647 dim_vector dv1 (1, 1); |
6361 | 1648 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1649 if (args(0).is_cell ()) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1650 str = args(0).cell_value (); |
6361 | 1651 else |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1652 str = Cell (args(0)); |
6361 | 1653 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1654 if (args(1).is_cell ()) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1655 pat = args(1).cell_value (); |
6361 | 1656 else |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1657 pat = Cell (args(1)); |
6361 | 1658 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1659 if (args(2).is_cell ()) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1660 rep = args(2).cell_value (); |
6361 | 1661 else |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1662 rep = Cell (args(2)); |
6361 | 1663 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1664 dv0 = str.dims (); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1665 if (pat.numel () != 1) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1666 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1667 dv1 = pat.dims (); |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1668 if (rep.numel () != 1 && dv1 != rep.dims ()) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1669 error ("regexprep: Inconsistent cell array dimensions"); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1670 } |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1671 else if (rep.numel () != 1) |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1672 dv1 = rep.dims (); |
6361 | 1673 |
1674 if (!error_state) | |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1675 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1676 Cell ret (dv0); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1677 octave_value_list new_args = args; |
6361 | 1678 |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1679 for (octave_idx_type i = 0; i < dv0.numel (); i++) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1680 { |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1681 new_args(0) = str(i); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1682 if (pat.numel() == 1) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1683 new_args(1) = pat(0); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1684 if (rep.numel() == 1) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1685 new_args(2) = rep(0); |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1686 |
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1687 for (octave_idx_type j = 0; j < dv1.numel (); j++) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1688 { |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1689 if (pat.numel () != 1) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1690 new_args(1) = pat(j); |
13227
9559417aa965
maint: regexp.cc style fixes
John W. Eaton <jwe@octave.org>
parents:
12642
diff
changeset
|
1691 if (rep.numel () != 1) |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1692 new_args(2) = rep(j); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1693 new_args(0) = octregexprep (new_args, "regexprep"); |
6361 | 1694 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1695 if (error_state) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1696 break; |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1697 } |
6361 | 1698 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1699 if (error_state) |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1700 break; |
6495 | 1701 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1702 ret(i) = new_args(0); |
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1703 } |
6361 | 1704 |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1705 if (!error_state) |
13311
d590d9df5596
regexprep: only return cell array if first arg is cell array
John W. Eaton <jwe@octave.org>
parents:
13310
diff
changeset
|
1706 retval = args(0).is_cell () |
d590d9df5596
regexprep: only return cell array if first arg is cell array
John W. Eaton <jwe@octave.org>
parents:
13310
diff
changeset
|
1707 ? octave_value (ret) : octave_value (ret(0)); |
10154
40dfc0c99116
DLD-FUNCTIONS/*.cc: untabify
John W. Eaton <jwe@octave.org>
parents:
9064
diff
changeset
|
1708 } |
6361 | 1709 } |
1710 else | |
1711 retval = octregexprep (args, "regexprep"); | |
1712 | |
5785 | 1713 return retval; |
1714 } | |
1715 | |
1716 /* | |
1717 %!test # Replace with empty | |
1718 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>'; | |
1719 %! t = regexprep(xml,'<[!?][^>]*>',''); | |
1720 %! assert(t,' <tag v="hello">some stuff</tag>') | |
1721 | |
1722 %!test # Replace with non-empty | |
1723 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>'; | |
1724 %! t = regexprep(xml,'<[!?][^>]*>','?'); | |
1725 %! assert(t,'? <tag v="hello">some stuff?</tag>') | |
1726 | |
1727 %!test # Check that 'tokenize' is ignored | |
1728 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>'; | |
1729 %! t = regexprep(xml,'<[!?][^>]*>','','tokenize'); | |
1730 %! assert(t,' <tag v="hello">some stuff</tag>') | |
1731 | |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
1732 ## Test capture replacement |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1733 %!test |
7242 | 1734 %! data = "Bob Smith\nDavid Hollerith\nSam Jenkins"; |
1735 %! result = "Smith, Bob\nHollerith, David\nJenkins, Sam"; | |
1736 %! t = regexprep(data,'(?m)^(\w+)\s+(\w+)$','$2, $1'); | |
1737 %! assert(t,result) | |
5785 | 1738 |
11032
c9b0a75b02e8
Make all regexp in Octave compatible with both POSIX and PCRE.
Rik <octave@nomad.inbox5.com>
parents:
11025
diff
changeset
|
1739 ## Return the original if no match |
5785 | 1740 %!assert(regexprep('hello','world','earth'),'hello') |
1741 | |
1742 ## Test a general replacement | |
1743 %!assert(regexprep("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_"), "a_b_c_d_e_f_g"); | |
1744 | |
1745 ## Make sure it works at the beginning and end | |
1746 %!assert(regexprep("a[b]c{d}e-f=g", "a", "_"), "_[b]c{d}e-f=g"); | |
1747 %!assert(regexprep("a[b]c{d}e-f=g", "g", "_"), "a[b]c{d}e-f=_"); | |
1748 | |
1749 ## Options | |
1750 %!assert(regexprep("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_", "once"), "a_b]c{d}e-f=g"); | |
1751 %!assert(regexprep("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "ignorecase"), "a_b_c_d_e_f_g"); | |
1752 | |
1753 ## Option combinations | |
1754 %!assert(regexprep("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "once", "ignorecase"), "a_b]c{d}e-f=g"); | |
1755 | |
1756 ## End conditions on replacement | |
1757 %!assert(regexprep("abc","(b)",".$1"),"a.bc"); | |
1758 %!assert(regexprep("abc","(b)","$1"),"abc"); | |
1759 %!assert(regexprep("abc","(b)","$1."),"ab.c"); | |
1760 %!assert(regexprep("abc","(b)","$1.."),"ab..c"); | |
1761 | |
6361 | 1762 ## Test cell array arguments |
13311
d590d9df5596
regexprep: only return cell array if first arg is cell array
John W. Eaton <jwe@octave.org>
parents:
13310
diff
changeset
|
1763 %!assert(regexprep("abc",{"b","a"},"?"),"??c") |
6361 | 1764 %!assert(regexprep({"abc","cba"},"b","?"),{"a?c","c?a"}) |
6503 | 1765 %!assert(regexprep({"abc","cba"},{"b","a"},{"?","!"}),{"!?c","c?!"}) |
6361 | 1766 |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
1767 # Nasty lookbehind expression |
12462
e4dbfe3019b1
Use PCRE regular expressions throughout Octave.
Rik <octave@nomad.inbox5.com>
parents:
12433
diff
changeset
|
1768 %!test |
11025
df2152514429
Update docstrings for regular expression functions
Rik <octave@nomad.inbox5.com>
parents:
11018
diff
changeset
|
1769 %! assert(regexprep('x^(-1)+y(-1)+z(-1)=0','(?<=[a-z]+)\(\-[1-9]*\)','_minus1'),'x^(-1)+y_minus1+z_minus1=0') |
8093
dcc31f473596
Treat PCRE lookbehind operators in a manner that is approximately correct
David Bateman <dbateman@free.fr>
parents:
8021
diff
changeset
|
1770 |
5785 | 1771 */ |