Mercurial > jwe > octave
annotate liboctave/util/lo-regexp.cc @ 30569:796f54d4ddbf stable
update Octave Project Developers copyright for the new year
In files that have the "Octave Project Developers" copyright notice,
update for 2021.
In all .txi and .texi files except gpl.txi and gpl.texi in the
doc/liboctave and doc/interpreter directories, change the copyright
to "Octave Project Developers", the same as used for other source
files. Update copyright notices for 2022 (not done since 2019). For
gpl.txi and gpl.texi, change the copyright notice to be "Free Software
Foundation, Inc." and leave the date at 2007 only because this file
only contains the text of the GPL, not anything created by the Octave
Project Developers.
Add Paul Thomas to contributors.in.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Tue, 28 Dec 2021 18:22:40 -0500 |
parents | f3f3e3793fb5 |
children | 5cf18ef0377c |
rev | line source |
---|---|
27928
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
1 //////////////////////////////////////////////////////////////////////// |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
2 // |
30569
796f54d4ddbf
update Octave Project Developers copyright for the new year
John W. Eaton <jwe@octave.org>
parents:
30399
diff
changeset
|
3 // Copyright (C) 2002-2022 The Octave Project Developers |
27928
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
4 // |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
5 // See the file COPYRIGHT.md in the top-level directory of this |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
6 // distribution or <https://octave.org/copyright/>. |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
7 // |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
8 // This file is part of Octave. |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
9 // |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
10 // Octave is free software: you can redistribute it and/or modify it |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
11 // under the terms of the GNU General Public License as published by |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
12 // the Free Software Foundation, either version 3 of the License, or |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
13 // (at your option) any later version. |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
14 // |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
15 // Octave is distributed in the hope that it will be useful, but |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
16 // WITHOUT ANY WARRANTY; without even the implied warranty of |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
18 // GNU General Public License for more details. |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
19 // |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
20 // You should have received a copy of the GNU General Public License |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
21 // along with Octave; see the file COPYING. If not, see |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
22 // <https://www.gnu.org/licenses/>. |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
23 // |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27924
diff
changeset
|
24 //////////////////////////////////////////////////////////////////////// |
7779
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
25 |
21724
aba2e6293dd8
use "#if ..." consistently instead of "#ifdef" and "#ifndef"
John W. Eaton <jwe@octave.org>
parents:
21301
diff
changeset
|
26 #if defined (HAVE_CONFIG_H) |
21301
40de9f8f23a6
Use '#include "config.h"' rather than <config.h>.
Rik <rik@octave.org>
parents:
21202
diff
changeset
|
27 # include "config.h" |
7779
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
28 #endif |
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
29 |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
30 #include <list> |
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
31 #include <sstream> |
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
32 #include <string> |
7779
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
33 #include <vector> |
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
34 |
14025
9867be070ee1
use pcre/pcre.h if it is present
John W. Eaton <jwe@octave.org>
parents:
14024
diff
changeset
|
35 #if defined (HAVE_PCRE_H) |
21202
f7121e111991
maint: indent #ifdef blocks in liboctave and src directories.
Rik <rik@octave.org>
parents:
21136
diff
changeset
|
36 # include <pcre.h> |
14025
9867be070ee1
use pcre/pcre.h if it is present
John W. Eaton <jwe@octave.org>
parents:
14024
diff
changeset
|
37 #elif defined (HAVE_PCRE_PCRE_H) |
21202
f7121e111991
maint: indent #ifdef blocks in liboctave and src directories.
Rik <rik@octave.org>
parents:
21136
diff
changeset
|
38 # include <pcre/pcre.h> |
14025
9867be070ee1
use pcre/pcre.h if it is present
John W. Eaton <jwe@octave.org>
parents:
14024
diff
changeset
|
39 #endif |
7779
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
40 |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
41 #include "Matrix.h" |
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
42 #include "base-list.h" |
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
43 #include "lo-error.h" |
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
44 #include "oct-locbuf.h" |
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
45 #include "quit.h" |
16940
99122191d3dd
maint: Rename regexp.h to lo-regexp.h, regexp.cc to lo-regexp.cc in liboctave.
Rik <rik@octave.org>
parents:
15819
diff
changeset
|
46 #include "lo-regexp.h" |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
47 #include "str-vec.h" |
27528
19ad9150dd69
Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents:
27384
diff
changeset
|
48 #include "unistr-wrappers.h" |
7779
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
49 |
27105
097774bed4ed
move some static functions inside octave namespace
John W. Eaton <jwe@octave.org>
parents:
26379
diff
changeset
|
50 namespace octave |
097774bed4ed
move some static functions inside octave namespace
John W. Eaton <jwe@octave.org>
parents:
26379
diff
changeset
|
51 { |
097774bed4ed
move some static functions inside octave namespace
John W. Eaton <jwe@octave.org>
parents:
26379
diff
changeset
|
52 // Define the maximum number of retries for a pattern |
097774bed4ed
move some static functions inside octave namespace
John W. Eaton <jwe@octave.org>
parents:
26379
diff
changeset
|
53 // that possibly results in an infinite recursion. |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
54 #define PCRE_MATCHLIMIT_MAX 10 |
7779
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
55 |
27105
097774bed4ed
move some static functions inside octave namespace
John W. Eaton <jwe@octave.org>
parents:
26379
diff
changeset
|
56 // FIXME: should this be configurable? |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
57 #define MAXLOOKBEHIND 10 |
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
58 |
27105
097774bed4ed
move some static functions inside octave namespace
John W. Eaton <jwe@octave.org>
parents:
26379
diff
changeset
|
59 static bool lookbehind_warned = false; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
60 |
27105
097774bed4ed
move some static functions inside octave namespace
John W. Eaton <jwe@octave.org>
parents:
26379
diff
changeset
|
61 // FIXME: don't bother collecting and composing return values |
097774bed4ed
move some static functions inside octave namespace
John W. Eaton <jwe@octave.org>
parents:
26379
diff
changeset
|
62 // the user doesn't want. |
7779
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
63 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
64 void |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
65 regexp::free (void) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
66 { |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
67 if (m_data) |
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
68 pcre_free (static_cast<pcre *> (m_data)); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
69 } |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
70 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
71 void |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
72 regexp::compile_internal (void) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
73 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
74 // If we had a previously compiled pattern, release it. |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
75 free (); |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
76 |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
77 std::size_t max_length = MAXLOOKBEHIND; |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
78 |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
79 std::size_t pos = 0; |
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
80 std::size_t new_pos; |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
81 int inames = 0; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
82 std::ostringstream buf; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
83 |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
84 while ((new_pos = m_pattern.find ("(?", pos)) != std::string::npos) |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
85 { |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
86 if (m_pattern.at (new_pos + 2) == '<' |
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
87 && !(m_pattern.at (new_pos + 3) == '=' |
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
88 || m_pattern.at (new_pos + 3) == '!')) |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
89 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
90 // The syntax of named tokens in pcre is "(?P<name>...)" while |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
91 // we need a syntax "(?<name>...)", so fix that here. Also an |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
92 // expression like |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
93 // "(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)" |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
94 // should be perfectly legal, while pcre does not allow the same |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
95 // named token name on both sides of the alternative. Also fix |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
96 // that here by replacing name tokens by dummy names, and dealing |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
97 // with the dummy names later. |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
98 |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
99 std::size_t tmp_pos = m_pattern.find_first_of ('>', new_pos); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
100 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
101 if (tmp_pos == std::string::npos) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
102 (*current_liboctave_error_handler) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
103 ("regexp: syntax error in pattern"); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
104 |
27281
db687716fed6
style fixes: generally aim to break long lines before operators, not after
John W. Eaton <jwe@octave.org>
parents:
27105
diff
changeset
|
105 std::string tmp_name |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
106 = m_pattern.substr (new_pos+3, tmp_pos-new_pos-3); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
107 |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
108 bool found = false; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
109 |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
110 for (int i = 0; i < m_names; i++) |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
111 { |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
112 if (m_named_pats(i) == tmp_name) |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
113 { |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
114 m_named_idx.resize (dim_vector (inames+1, 1)); |
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
115 m_named_idx(inames) = i; |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
116 found = true; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
117 break; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
118 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
119 } |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
120 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
121 if (! found) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
122 { |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
123 m_named_idx.resize (dim_vector (inames+1, 1)); |
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
124 m_named_idx(inames) = m_names; |
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
125 m_named_pats.append (tmp_name); |
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
126 m_names++; |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
127 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
128 |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
129 if (new_pos - pos > 0) |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
130 buf << m_pattern.substr (pos, new_pos-pos); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
131 if (inames < 10) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
132 buf << "(?P<n00" << inames++; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
133 else if (inames < 100) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
134 buf << "(?P<n0" << inames++; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
135 else |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
136 buf << "(?P<n" << inames++; |
7779
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
137 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
138 pos = tmp_pos; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
139 } |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
140 else if (m_pattern.at (new_pos + 2) == '<') |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
141 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
142 // Find lookbehind operators of arbitrary length (ie like |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
143 // "(?<=[a-z]*)") and replace with a maximum length operator |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
144 // as PCRE can not yet handle arbitrary length lookahead |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
145 // operators. Use the string length as the maximum length to |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
146 // avoid issues. |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
147 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
148 int brackets = 1; |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
149 std::size_t tmp_pos1 = new_pos + 2; |
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
150 std::size_t tmp_pos2 = tmp_pos1; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
151 |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
152 while (tmp_pos1 < m_pattern.length () && brackets > 0) |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
153 { |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
154 char ch = m_pattern.at (tmp_pos1); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
155 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
156 if (ch == '(') |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
157 brackets++; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
158 else if (ch == ')') |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
159 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
160 if (brackets > 1) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
161 tmp_pos2 = tmp_pos1; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
162 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
163 brackets--; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
164 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
165 |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
166 tmp_pos1++; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
167 } |
7779
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
168 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
169 if (brackets != 0) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
170 { |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
171 buf << m_pattern.substr (pos, new_pos - pos) << "(?"; |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
172 pos = new_pos + 2; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
173 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
174 else |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
175 { |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
176 std::size_t tmp_pos3 = m_pattern.find_first_of ("*+", tmp_pos2); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
177 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
178 if (tmp_pos3 != std::string::npos && tmp_pos3 < tmp_pos1) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
179 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
180 if (! lookbehind_warned) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
181 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
182 lookbehind_warned = true; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
183 (*current_liboctave_warning_with_id_handler) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
184 ("Octave:regexp-lookbehind-limit", |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
185 "%s: arbitrary length lookbehind patterns are only supported up to length %d", |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
186 m_who.c_str (), MAXLOOKBEHIND); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
187 } |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
188 |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
189 buf << m_pattern.substr (pos, new_pos - pos) << '('; |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
190 |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
191 std::size_t i; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
192 |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
193 if (m_pattern.at (tmp_pos3) == '*') |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
194 i = 0; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
195 else |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
196 i = 1; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
197 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
198 for (; i < max_length + 1; i++) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
199 { |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
200 buf << m_pattern.substr (new_pos, tmp_pos3 - new_pos) |
23807
336f89b6208b
Use character literals 'c' rather than string literals "c" when possible.
Rik <rik@octave.org>
parents:
23795
diff
changeset
|
201 << '{' << i << '}'; |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
202 buf << m_pattern.substr (tmp_pos3 + 1, |
27938
863ae57eee69
maint: Use Octave coding conventions in liboctave/
Rik <rik@octave.org>
parents:
27928
diff
changeset
|
203 tmp_pos1 - tmp_pos3 - 1); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
204 if (i != max_length) |
23807
336f89b6208b
Use character literals 'c' rather than string literals "c" when possible.
Rik <rik@octave.org>
parents:
23795
diff
changeset
|
205 buf << '|'; |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
206 } |
23807
336f89b6208b
Use character literals 'c' rather than string literals "c" when possible.
Rik <rik@octave.org>
parents:
23795
diff
changeset
|
207 buf << ')'; |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
208 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
209 else |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
210 buf << m_pattern.substr (pos, tmp_pos1 - pos); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
211 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
212 pos = tmp_pos1; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
213 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
214 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
215 else |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
216 { |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
217 buf << m_pattern.substr (pos, new_pos - pos) << "(?"; |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
218 pos = new_pos + 2; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
219 } |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
220 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
221 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
222 |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
223 buf << m_pattern.substr (pos); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
224 |
25103
078b795c5219
maint: style check C++ ahead of 4.4 release.
Rik <rik@octave.org>
parents:
25054
diff
changeset
|
225 // Replace NULLs with escape sequence because conversion function c_str() |
24741
00dfa167c1fe
Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents:
24607
diff
changeset
|
226 // will terminate string early at embedded NULLs. |
00dfa167c1fe
Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents:
24607
diff
changeset
|
227 std::string buf_str = buf.str (); |
00dfa167c1fe
Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents:
24607
diff
changeset
|
228 while ((pos = buf_str.find ('\0')) != std::string::npos) |
00dfa167c1fe
Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents:
24607
diff
changeset
|
229 buf_str.replace (pos, 1, "\\000"); |
00dfa167c1fe
Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents:
24607
diff
changeset
|
230 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
231 const char *err; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
232 int erroffset; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
233 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
234 int pcre_options |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
235 = ( (m_options.case_insensitive () ? PCRE_CASELESS : 0) |
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
236 | (m_options.dotexceptnewline () ? 0 : PCRE_DOTALL) |
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
237 | (m_options.lineanchors () ? PCRE_MULTILINE : 0) |
27570
74173f04d2a3
Use syntax for UTF-8 mode with PCRE that is used in the configure test (bug #35910).
Markus Mützel <markus.muetzel@gmx.de>
parents:
27528
diff
changeset
|
238 | (m_options.freespacing () ? PCRE_EXTENDED : 0) |
74173f04d2a3
Use syntax for UTF-8 mode with PCRE that is used in the configure test (bug #35910).
Markus Mützel <markus.muetzel@gmx.de>
parents:
27528
diff
changeset
|
239 | PCRE_UTF8); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
240 |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
241 m_data = pcre_compile (buf_str.c_str (), pcre_options, |
27938
863ae57eee69
maint: Use Octave coding conventions in liboctave/
Rik <rik@octave.org>
parents:
27928
diff
changeset
|
242 &err, &erroffset, nullptr); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
243 |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
244 if (! m_data) |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
245 (*current_liboctave_error_handler) |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
246 ("%s: %s at position %d of expression", m_who.c_str (), err, erroffset); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
247 } |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
248 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
249 regexp::match_data |
29218
397d043bb2ff
Add changes missing in cf059093ffbc.
Markus Mützel <markus.muetzel@gmx.de>
parents:
29217
diff
changeset
|
250 regexp::match (const std::string& buffer) const |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
251 { |
27528
19ad9150dd69
Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents:
27384
diff
changeset
|
252 // check if input is valid utf-8 |
19ad9150dd69
Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents:
27384
diff
changeset
|
253 const uint8_t *buf_str = reinterpret_cast<const uint8_t *> (buffer.c_str ()); |
19ad9150dd69
Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents:
27384
diff
changeset
|
254 if (octave_u8_check_wrapper (buf_str, buffer.length ())) |
19ad9150dd69
Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents:
27384
diff
changeset
|
255 (*current_liboctave_error_handler) |
19ad9150dd69
Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents:
27384
diff
changeset
|
256 ("%s: the input string is invalid UTF-8", m_who.c_str ()); |
19ad9150dd69
Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents:
27384
diff
changeset
|
257 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
258 regexp::match_data retval; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
259 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
260 std::list<regexp::match_element> lst; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
261 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
262 int subpatterns; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
263 int namecount; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
264 int nameentrysize; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
265 char *nametable; |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
266 std::size_t idx = 0; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
267 |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
268 pcre *re = static_cast<pcre *> (m_data); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
269 |
23795
980f39c3ab90
Use C++11 nullptr rather than 0 in code (bug #51565).
Rik <rik@octave.org>
parents:
23302
diff
changeset
|
270 pcre_fullinfo (re, nullptr, PCRE_INFO_CAPTURECOUNT, &subpatterns); |
980f39c3ab90
Use C++11 nullptr rather than 0 in code (bug #51565).
Rik <rik@octave.org>
parents:
23302
diff
changeset
|
271 pcre_fullinfo (re, nullptr, PCRE_INFO_NAMECOUNT, &namecount); |
980f39c3ab90
Use C++11 nullptr rather than 0 in code (bug #51565).
Rik <rik@octave.org>
parents:
23302
diff
changeset
|
272 pcre_fullinfo (re, nullptr, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize); |
980f39c3ab90
Use C++11 nullptr rather than 0 in code (bug #51565).
Rik <rik@octave.org>
parents:
23302
diff
changeset
|
273 pcre_fullinfo (re, nullptr, PCRE_INFO_NAMETABLE, &nametable); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
274 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
275 OCTAVE_LOCAL_BUFFER (int, ovector, (subpatterns+1)*3); |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
276 OCTAVE_LOCAL_BUFFER (int, nidx, namecount); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
277 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
278 for (int i = 0; i < namecount; i++) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
279 { |
23302
78cf75ffd382
maint: tweak readability of code in lo-regexp.cc.
Rik <rik@octave.org>
parents:
23084
diff
changeset
|
280 // Index of subpattern in first two bytes of name (MSB first). |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
281 // Extract index. |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
282 nidx[i] = (static_cast<int> (nametable[i*nameentrysize])) << 8 |
22402
4caa7b28d183
maint: Style check C++ code in liboctave/
Rik <rik@octave.org>
parents:
22333
diff
changeset
|
283 | static_cast<int> (nametable[i*nameentrysize+1]); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
284 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
285 |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
286 while (true) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
287 { |
22860
0b1e25cc4457
eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents:
22755
diff
changeset
|
288 octave_quit (); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
289 |
23795
980f39c3ab90
Use C++11 nullptr rather than 0 in code (bug #51565).
Rik <rik@octave.org>
parents:
23302
diff
changeset
|
290 int matches = pcre_exec (re, nullptr, buffer.c_str (), |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
291 buffer.length (), idx, |
27528
19ad9150dd69
Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents:
27384
diff
changeset
|
292 PCRE_NO_UTF8_CHECK | (idx ? PCRE_NOTBOL : 0), |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
293 ovector, (subpatterns+1)*3); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
294 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
295 if (matches == PCRE_ERROR_MATCHLIMIT) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
296 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
297 // Try harder; start with default value for MATCH_LIMIT |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
298 // and increase it. |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
299 (*current_liboctave_warning_with_id_handler) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
300 ("Octave:regexp-match-limit", |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
301 "your pattern caused PCRE to hit its MATCH_LIMIT; trying harder now, but this will be slow"); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
302 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
303 pcre_extra pe; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
304 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
305 pcre_config (PCRE_CONFIG_MATCH_LIMIT, |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
306 static_cast<void *> (&pe.match_limit)); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
307 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
308 pe.flags = PCRE_EXTRA_MATCH_LIMIT; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
309 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
310 int i = 0; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
311 while (matches == PCRE_ERROR_MATCHLIMIT |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
312 && i++ < PCRE_MATCHLIMIT_MAX) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
313 { |
22860
0b1e25cc4457
eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents:
22755
diff
changeset
|
314 octave_quit (); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
315 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
316 pe.match_limit *= 10; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
317 matches = pcre_exec (re, &pe, buffer.c_str (), |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
318 buffer.length (), idx, |
27528
19ad9150dd69
Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents:
27384
diff
changeset
|
319 PCRE_NO_UTF8_CHECK |
19ad9150dd69
Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents:
27384
diff
changeset
|
320 | (idx ? PCRE_NOTBOL : 0), |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
321 ovector, (subpatterns+1)*3); |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
322 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
323 } |
7779
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
324 |
27528
19ad9150dd69
Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents:
27384
diff
changeset
|
325 if (matches < 0 && matches != PCRE_ERROR_NOMATCH) |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
326 (*current_liboctave_error_handler) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
327 ("%s: internal error calling pcre_exec; " |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
328 "error code from pcre_exec is %i", m_who.c_str (), matches); |
21136
7cac4e7458f2
maint: clean up code around calls to current_liboctave_error_handler.
Rik <rik@octave.org>
parents:
20955
diff
changeset
|
329 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
330 if (matches == PCRE_ERROR_NOMATCH) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
331 break; |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
332 else if (ovector[0] >= ovector[1] && ! m_options.emptymatch ()) |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
333 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
334 // Zero length match. Skip to next char. |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
335 idx = ovector[0] + 1; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
336 if (idx < buffer.length ()) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
337 continue; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
338 else |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
339 break; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
340 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
341 else |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
342 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
343 int pos_match = 0; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
344 Matrix token_extents (matches-1, 2); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
345 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
346 for (int i = 1; i < matches; i++) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
347 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
348 if (ovector[2*i] >= 0 && ovector[2*i+1] > 0 |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
349 && (i == 1 || ovector[2*i] != ovector[2*i-2] |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
350 || ovector[2*i-1] != ovector[2*i+1])) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
351 { |
30399
f3f3e3793fb5
maint: style check C++ files in liboctave/ ahead of 7.1 release.
Rik <rik@octave.org>
parents:
30183
diff
changeset
|
352 token_extents(pos_match, 0) = double (ovector[2*i]+1); |
f3f3e3793fb5
maint: style check C++ files in liboctave/ ahead of 7.1 release.
Rik <rik@octave.org>
parents:
30183
diff
changeset
|
353 token_extents(pos_match++, 1) = double (ovector[2*i+1]); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
354 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
355 } |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
356 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
357 token_extents.resize (pos_match, 2); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
358 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
359 double start = double (ovector[0]+1); |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
360 double end = double (ovector[1]); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
361 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
362 const char **listptr; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
363 int status = pcre_get_substring_list (buffer.c_str (), ovector, |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
364 matches, &listptr); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
365 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
366 if (status == PCRE_ERROR_NOMEMORY) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
367 (*current_liboctave_error_handler) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
368 ("%s: cannot allocate memory in pcre_get_substring_list", |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
369 m_who.c_str ()); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
370 |
24741
00dfa167c1fe
Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents:
24607
diff
changeset
|
371 // Must use explicit length constructor as match can contain '\0'. |
00dfa167c1fe
Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents:
24607
diff
changeset
|
372 std::string match_string = std::string (*listptr, end - start + 1); |
00dfa167c1fe
Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents:
24607
diff
changeset
|
373 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
374 string_vector tokens (pos_match); |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
375 string_vector named_tokens (m_names); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
376 int pos_offset = 0; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
377 pos_match = 0; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
378 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
379 for (int i = 1; i < matches; i++) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
380 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
381 if (ovector[2*i] >= 0 && ovector[2*i+1] > 0) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
382 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
383 if (i == 1 || ovector[2*i] != ovector[2*i-2] |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
384 || ovector[2*i-1] != ovector[2*i+1]) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
385 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
386 if (namecount > 0) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
387 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
388 // FIXME: Should probably do this with a map() |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
389 // rather than a linear search. However, |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
390 // the number of captured, named expressions |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
391 // is usually pretty small (< 4) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
392 for (int j = 0; j < namecount; j++) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
393 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
394 if (nidx[j] == i) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
395 { |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
396 std::size_t len = ovector[2*i+1] - ovector[2*i]; |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
397 named_tokens(m_named_idx(j)) |
27281
db687716fed6
style fixes: generally aim to break long lines before operators, not after
John W. Eaton <jwe@octave.org>
parents:
27105
diff
changeset
|
398 = std::string (*(listptr+i-pos_offset), |
db687716fed6
style fixes: generally aim to break long lines before operators, not after
John W. Eaton <jwe@octave.org>
parents:
27105
diff
changeset
|
399 len); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
400 break; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
401 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
402 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
403 } |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
404 |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
405 std::size_t len = ovector[2*i+1] - ovector[2*i]; |
24741
00dfa167c1fe
Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents:
24607
diff
changeset
|
406 tokens(pos_match++) = std::string (*(listptr+i), len); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
407 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
408 else |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
409 pos_offset++; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
410 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
411 } |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
412 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
413 pcre_free_substring_list (listptr); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
414 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
415 regexp::match_element new_elem (named_tokens, tokens, match_string, |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
416 token_extents, start, end); |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
417 lst.push_back (new_elem); |
14536
6d5c951ec520
Add 'emptymatch', 'noemptymatch' options to regular expressions.
Rik <octave@nomad.inbox5.com>
parents:
14506
diff
changeset
|
418 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
419 if (ovector[1] <= ovector[0]) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
420 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
421 // Zero length match. Skip to next char. |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
422 idx = ovector[0] + 1; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
423 if (idx <= buffer.length ()) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
424 continue; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
425 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
426 else |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
427 idx = ovector[1]; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
428 |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
429 if (m_options.once () || idx >= buffer.length ()) |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
430 break; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
431 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
432 } |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
433 |
27384
3db033e86376
use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents:
27372
diff
changeset
|
434 retval = regexp::match_data (lst, m_named_pats); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
435 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
436 return retval; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
437 } |
7779
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
438 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
439 bool |
29217
cf059093ffbc
stat: Make regular expression for Windows a static const (bug #59706).
Markus Mützel <markus.muetzel@gmx.de>
parents:
27938
diff
changeset
|
440 regexp::is_match (const std::string& buffer) const |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
441 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
442 regexp::match_data rx_lst = match (buffer); |
7779
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
443 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
444 return rx_lst.size () > 0; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
445 } |
7779
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
446 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
447 Array<bool> |
29217
cf059093ffbc
stat: Make regular expression for Windows a static const (bug #59706).
Markus Mützel <markus.muetzel@gmx.de>
parents:
27938
diff
changeset
|
448 regexp::is_match (const string_vector& buffer) const |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
449 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
450 octave_idx_type len = buffer.numel (); |
7779
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
451 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
452 Array<bool> retval (dim_vector (len, 1)); |
7779
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
453 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
454 for (octave_idx_type i = 0; i < buffer.numel (); i++) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
455 retval(i) = is_match (buffer(i)); |
7779
791231dac333
Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff
changeset
|
456 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
457 return retval; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
458 } |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
459 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
460 // Declare rep_token_t used in processing replacement string |
30183
d0184bad0c07
maint: lo-regexp.cc: Use C++ named struct declaration rather than C-style typedef.
Rik <rik@octave.org>
parents:
29660
diff
changeset
|
461 struct rep_token_t |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
462 { |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
463 std::size_t pos; |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
464 int num; |
30183
d0184bad0c07
maint: lo-regexp.cc: Use C++ named struct declaration rather than C-style typedef.
Rik <rik@octave.org>
parents:
29660
diff
changeset
|
465 }; |
15541
9db32cabeacf
Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents:
15271
diff
changeset
|
466 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
467 std::string |
29218
397d043bb2ff
Add changes missing in cf059093ffbc.
Markus Mützel <markus.muetzel@gmx.de>
parents:
29217
diff
changeset
|
468 regexp::replace (const std::string& buffer, |
397d043bb2ff
Add changes missing in cf059093ffbc.
Markus Mützel <markus.muetzel@gmx.de>
parents:
29217
diff
changeset
|
469 const std::string& replacement) const |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
470 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
471 std::string retval; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
472 |
25342
416856765a55
be more careful with using auto in place of explicit const iterator decls
John W. Eaton <jwe@octave.org>
parents:
25337
diff
changeset
|
473 const regexp::match_data rx_lst = match (buffer); |
15541
9db32cabeacf
Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents:
15271
diff
changeset
|
474 |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
475 std::size_t num_matches = rx_lst.size (); |
15541
9db32cabeacf
Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents:
15271
diff
changeset
|
476 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
477 if (num_matches == 0) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
478 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
479 retval = buffer; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
480 return retval; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
481 } |
15541
9db32cabeacf
Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents:
15271
diff
changeset
|
482 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
483 // Identify replacement tokens; build a vector of group numbers in |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
484 // the replacement string so that we can quickly calculate the size |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
485 // of the replacement. |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
486 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
487 // FIXME: All code assumes that only 10 tokens ($0-$9) exist. |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
488 // $11 represents $1 followed by the character '1' rather than |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
489 // the eleventh capture buffer. |
15541
9db32cabeacf
Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents:
15271
diff
changeset
|
490 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
491 std::string repstr = replacement; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
492 std::vector<rep_token_t> tokens; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
493 tokens.reserve (5); // Reserve memory for 5 pattern replacements |
15541
9db32cabeacf
Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents:
15271
diff
changeset
|
494 |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
495 for (std::size_t i=0; i < repstr.size (); i++) |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
496 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
497 if (repstr[i] == '\\') |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
498 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
499 if (i < repstr.size () - 1 && repstr[i+1] == '$') |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
500 { |
30399
f3f3e3793fb5
maint: style check C++ files in liboctave/ ahead of 7.1 release.
Rik <rik@octave.org>
parents:
30183
diff
changeset
|
501 repstr.erase (i, 1); // erase backslash |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
502 i++; // skip over '$' |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
503 continue; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
504 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
505 if (i < repstr.size () - 1 && repstr[i+1] == '\\') |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
506 { |
30399
f3f3e3793fb5
maint: style check C++ files in liboctave/ ahead of 7.1 release.
Rik <rik@octave.org>
parents:
30183
diff
changeset
|
507 repstr.erase (i, 1); // erase 1st backslash |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
508 continue; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
509 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
510 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
511 else if (repstr[i] == '$') |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
512 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
513 if (i < repstr.size () - 1 && isdigit (repstr[i+1])) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
514 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
515 rep_token_t tmp_token; |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
516 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
517 tmp_token.pos = i; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
518 tmp_token.num = repstr[i+1]-'0'; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
519 tokens.push_back (tmp_token); |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
520 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
521 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
522 } |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
523 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
524 std::string rep; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
525 int num_tokens = tokens.size (); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
526 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
527 if (num_tokens > 0) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
528 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
529 // Determine replacement length |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
530 const std::size_t replen = repstr.size () - 2*num_tokens; |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
531 int delta = 0; |
25337
3ff9192b676e
use auto keyword to declare iterator variables where possible
John W. Eaton <jwe@octave.org>
parents:
25166
diff
changeset
|
532 auto p = rx_lst.begin (); |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
533 for (std::size_t i = 0; i < num_matches; i++) |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
534 { |
22860
0b1e25cc4457
eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents:
22755
diff
changeset
|
535 octave_quit (); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
536 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
537 double start = p->start (); |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
538 double end = p->end (); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
539 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
540 const Matrix pairs (p->token_extents ()); |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
541 std::size_t pairlen = 0; |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
542 for (int j = 0; j < num_tokens; j++) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
543 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
544 if (tokens[j].num == 0) |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
545 pairlen += static_cast<std::size_t> (end - start + 1); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
546 else if (tokens[j].num <= pairs.rows ()) |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
547 pairlen += static_cast<std::size_t> (pairs(tokens[j].num-1,1) |
24605
3e17190dfaea
Fix incorrect regexprep on ARM platforms (bug #52810).
Rik <rik@octave.org>
parents:
23083
diff
changeset
|
548 - pairs(tokens[j].num-1,0) |
3e17190dfaea
Fix incorrect regexprep on ARM platforms (bug #52810).
Rik <rik@octave.org>
parents:
23083
diff
changeset
|
549 + 1); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
550 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
551 delta += (static_cast<int> (replen + pairlen) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
552 - static_cast<int> (end - start + 1)); |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
553 p++; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
554 } |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
555 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
556 // Build replacement string |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
557 rep.reserve (buffer.size () + delta); |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
558 std::size_t from = 0; |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
559 p = rx_lst.begin (); |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
560 for (std::size_t i = 0; i < num_matches; i++) |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
561 { |
22860
0b1e25cc4457
eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents:
22755
diff
changeset
|
562 octave_quit (); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
563 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
564 double start = p->start (); |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
565 double end = p->end (); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
566 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
567 const Matrix pairs (p->token_extents ()); |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
568 rep.append (&buffer[from], static_cast<std::size_t> (start - 1 - from)); |
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
569 from = static_cast<std::size_t> (end); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
570 |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
571 std::size_t cur_pos = 0; |
15541
9db32cabeacf
Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents:
15271
diff
changeset
|
572 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
573 for (int j = 0; j < num_tokens; j++) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
574 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
575 rep.append (&repstr[cur_pos], (tokens[j].pos) - cur_pos); |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
576 cur_pos = tokens[j].pos+2; |
15541
9db32cabeacf
Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents:
15271
diff
changeset
|
577 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
578 int k = tokens[j].num; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
579 if (k == 0) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
580 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
581 // replace with entire match |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
582 rep.append (&buffer[static_cast<std::size_t> (end - 1)], |
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
583 static_cast<std::size_t> (end - start + 1)); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
584 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
585 else if (k <= pairs.rows ()) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
586 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
587 // replace with group capture |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
588 rep.append (&buffer[static_cast<std::size_t> (pairs(k-1,0)-1)], |
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
589 static_cast<std::size_t> (pairs(k-1,1) |
24605
3e17190dfaea
Fix incorrect regexprep on ARM platforms (bug #52810).
Rik <rik@octave.org>
parents:
23083
diff
changeset
|
590 - pairs(k-1,0) + 1)); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
591 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
592 else |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
593 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
594 // replace with nothing |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
595 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
596 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
597 if (cur_pos < repstr.size ()) |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
598 rep.append (&repstr[cur_pos], repstr.size () - cur_pos); |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
599 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
600 p++; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
601 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
602 rep.append (&buffer[from], buffer.size () - from); |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
603 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
604 else |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
605 { |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
606 // Determine repstr length |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
607 const std::size_t replen = repstr.size (); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
608 int delta = 0; |
25337
3ff9192b676e
use auto keyword to declare iterator variables where possible
John W. Eaton <jwe@octave.org>
parents:
25166
diff
changeset
|
609 auto p = rx_lst.begin (); |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
610 for (std::size_t i = 0; i < num_matches; i++) |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
611 { |
22860
0b1e25cc4457
eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents:
22755
diff
changeset
|
612 octave_quit (); |
0b1e25cc4457
eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents:
22755
diff
changeset
|
613 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
614 delta += static_cast<int> (replen) |
22402
4caa7b28d183
maint: Style check C++ code in liboctave/
Rik <rik@octave.org>
parents:
22333
diff
changeset
|
615 - static_cast<int> (p->end () - p->start () + 1); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
616 p++; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
617 } |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
618 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
619 // Build replacement string |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
620 rep.reserve (buffer.size () + delta); |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
621 std::size_t from = 0; |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
622 p = rx_lst.begin (); |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
623 for (std::size_t i = 0; i < num_matches; i++) |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
624 { |
22860
0b1e25cc4457
eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents:
22755
diff
changeset
|
625 octave_quit (); |
0b1e25cc4457
eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents:
22755
diff
changeset
|
626 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
627 rep.append (&buffer[from], |
29659
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
628 static_cast<std::size_t> (p->start () - 1 - from)); |
d13d090cb03a
use std::size_t and std::ptrdiff_t in C++ code (bug #60471)
John W. Eaton <jwe@octave.org>
parents:
29363
diff
changeset
|
629 from = static_cast<std::size_t> (p->end ()); |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
630 rep.append (repstr); |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
631 p++; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
632 } |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
633 rep.append (&buffer[from], buffer.size () - from); |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
634 } |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
635 |
22333
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
636 retval = rep; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
637 return retval; |
2758af148ced
move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents:
22323
diff
changeset
|
638 } |
14024
fc9f204faea0
refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents:
11586
diff
changeset
|
639 } |