annotate liboctave/util/lo-regexp.cc @ 27924:1891570abac8

update Octave Project Developers copyright for the new year In files that have the "Octave Project Developers" copyright notice, update for 2020.
author John W. Eaton <jwe@octave.org>
date Mon, 06 Jan 2020 22:29:51 -0500
parents b442ec6dda5c
children bd51beb6205e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
1 /*
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
2
27924
1891570abac8 update Octave Project Developers copyright for the new year
John W. Eaton <jwe@octave.org>
parents: 27923
diff changeset
3 Copyright (C) 2002-2020 The Octave Project Developers
27923
b442ec6dda5c use centralized file for copyright info for individual contributors
John W. Eaton <jwe@octave.org>
parents: 27570
diff changeset
4
b442ec6dda5c use centralized file for copyright info for individual contributors
John W. Eaton <jwe@octave.org>
parents: 27570
diff changeset
5 See the file COPYRIGHT.md in the top-level directory of this distribution
b442ec6dda5c use centralized file for copyright info for individual contributors
John W. Eaton <jwe@octave.org>
parents: 27570
diff changeset
6 or <https://octave.org/COPYRIGHT.html/>.
b442ec6dda5c use centralized file for copyright info for individual contributors
John W. Eaton <jwe@octave.org>
parents: 27570
diff changeset
7
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
8
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
9 This file is part of Octave.
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
10
24534
194eb4bd202b maint: Update punctuation for GPL v3 license text.
Rik <rik@octave.org>
parents: 23807
diff changeset
11 Octave is free software: you can redistribute it and/or modify it
22755
3a2b891d0b33 maint: Standardize Copyright formatting.
Rik <rik@octave.org>
parents: 22402
diff changeset
12 under the terms of the GNU General Public License as published by
24534
194eb4bd202b maint: Update punctuation for GPL v3 license text.
Rik <rik@octave.org>
parents: 23807
diff changeset
13 the Free Software Foundation, either version 3 of the License, or
22755
3a2b891d0b33 maint: Standardize Copyright formatting.
Rik <rik@octave.org>
parents: 22402
diff changeset
14 (at your option) any later version.
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
15
22755
3a2b891d0b33 maint: Standardize Copyright formatting.
Rik <rik@octave.org>
parents: 22402
diff changeset
16 Octave is distributed in the hope that it will be useful, but
3a2b891d0b33 maint: Standardize Copyright formatting.
Rik <rik@octave.org>
parents: 22402
diff changeset
17 WITHOUT ANY WARRANTY; without even the implied warranty of
3a2b891d0b33 maint: Standardize Copyright formatting.
Rik <rik@octave.org>
parents: 22402
diff changeset
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3a2b891d0b33 maint: Standardize Copyright formatting.
Rik <rik@octave.org>
parents: 22402
diff changeset
19 GNU General Public License for more details.
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
20
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
21 You should have received a copy of the GNU General Public License
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
22 along with Octave; see the file COPYING. If not, see
24534
194eb4bd202b maint: Update punctuation for GPL v3 license text.
Rik <rik@octave.org>
parents: 23807
diff changeset
23 <https://www.gnu.org/licenses/>.
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
24
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
25 */
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
26
21724
aba2e6293dd8 use "#if ..." consistently instead of "#ifdef" and "#ifndef"
John W. Eaton <jwe@octave.org>
parents: 21301
diff changeset
27 #if defined (HAVE_CONFIG_H)
21301
40de9f8f23a6 Use '#include "config.h"' rather than <config.h>.
Rik <rik@octave.org>
parents: 21202
diff changeset
28 # include "config.h"
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
29 #endif
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
30
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
31 #include <list>
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
32 #include <sstream>
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
33 #include <string>
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
34 #include <vector>
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
35
14025
9867be070ee1 use pcre/pcre.h if it is present
John W. Eaton <jwe@octave.org>
parents: 14024
diff changeset
36 #if defined (HAVE_PCRE_H)
21202
f7121e111991 maint: indent #ifdef blocks in liboctave and src directories.
Rik <rik@octave.org>
parents: 21136
diff changeset
37 # include <pcre.h>
14025
9867be070ee1 use pcre/pcre.h if it is present
John W. Eaton <jwe@octave.org>
parents: 14024
diff changeset
38 #elif defined (HAVE_PCRE_PCRE_H)
21202
f7121e111991 maint: indent #ifdef blocks in liboctave and src directories.
Rik <rik@octave.org>
parents: 21136
diff changeset
39 # include <pcre/pcre.h>
14025
9867be070ee1 use pcre/pcre.h if it is present
John W. Eaton <jwe@octave.org>
parents: 14024
diff changeset
40 #endif
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
41
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
42 #include "Matrix.h"
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
43 #include "base-list.h"
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
44 #include "lo-error.h"
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
45 #include "oct-locbuf.h"
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
46 #include "quit.h"
16940
99122191d3dd maint: Rename regexp.h to lo-regexp.h, regexp.cc to lo-regexp.cc in liboctave.
Rik <rik@octave.org>
parents: 15819
diff changeset
47 #include "lo-regexp.h"
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
48 #include "str-vec.h"
27528
19ad9150dd69 Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents: 27384
diff changeset
49 #include "unistr-wrappers.h"
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
50
27105
097774bed4ed move some static functions inside octave namespace
John W. Eaton <jwe@octave.org>
parents: 26379
diff changeset
51 namespace octave
097774bed4ed move some static functions inside octave namespace
John W. Eaton <jwe@octave.org>
parents: 26379
diff changeset
52 {
097774bed4ed move some static functions inside octave namespace
John W. Eaton <jwe@octave.org>
parents: 26379
diff changeset
53 // Define the maximum number of retries for a pattern
097774bed4ed move some static functions inside octave namespace
John W. Eaton <jwe@octave.org>
parents: 26379
diff changeset
54 // that possibly results in an infinite recursion.
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
55 #define PCRE_MATCHLIMIT_MAX 10
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
56
27105
097774bed4ed move some static functions inside octave namespace
John W. Eaton <jwe@octave.org>
parents: 26379
diff changeset
57 // FIXME: should this be configurable?
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
58 #define MAXLOOKBEHIND 10
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
59
27105
097774bed4ed move some static functions inside octave namespace
John W. Eaton <jwe@octave.org>
parents: 26379
diff changeset
60 static bool lookbehind_warned = false;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
61
27105
097774bed4ed move some static functions inside octave namespace
John W. Eaton <jwe@octave.org>
parents: 26379
diff changeset
62 // FIXME: don't bother collecting and composing return values
097774bed4ed move some static functions inside octave namespace
John W. Eaton <jwe@octave.org>
parents: 26379
diff changeset
63 // the user doesn't want.
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
64
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
65 void
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
66 regexp::free (void)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
67 {
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
68 if (m_data)
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
69 pcre_free (static_cast<pcre *> (m_data));
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
70 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
71
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
72 void
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
73 regexp::compile_internal (void)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
74 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
75 // If we had a previously compiled pattern, release it.
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
76 free ();
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
77
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
78 size_t max_length = MAXLOOKBEHIND;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
79
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
80 size_t pos = 0;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
81 size_t new_pos;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
82 int inames = 0;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
83 std::ostringstream buf;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
84
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
85 while ((new_pos = m_pattern.find ("(?", pos)) != std::string::npos)
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
86 {
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
87 if (m_pattern.at (new_pos + 2) == '<'
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
88 && !(m_pattern.at (new_pos + 3) == '='
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
89 || m_pattern.at (new_pos + 3) == '!'))
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
90 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
91 // The syntax of named tokens in pcre is "(?P<name>...)" while
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
92 // we need a syntax "(?<name>...)", so fix that here. Also an
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
93 // expression like
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
94 // "(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)"
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
95 // should be perfectly legal, while pcre does not allow the same
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
96 // named token name on both sides of the alternative. Also fix
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
97 // that here by replacing name tokens by dummy names, and dealing
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
98 // with the dummy names later.
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
99
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
100 size_t tmp_pos = m_pattern.find_first_of ('>', new_pos);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
101
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
102 if (tmp_pos == std::string::npos)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
103 (*current_liboctave_error_handler)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
104 ("regexp: syntax error in pattern");
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
105
27281
db687716fed6 style fixes: generally aim to break long lines before operators, not after
John W. Eaton <jwe@octave.org>
parents: 27105
diff changeset
106 std::string tmp_name
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
107 = m_pattern.substr (new_pos+3, tmp_pos-new_pos-3);
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
108
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
109 bool found = false;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
110
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
111 for (int i = 0; i < m_names; i++)
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
112 {
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
113 if (m_named_pats(i) == tmp_name)
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
114 {
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
115 m_named_idx.resize (dim_vector (inames+1, 1));
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
116 m_named_idx(inames) = i;
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
117 found = true;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
118 break;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
119 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
120 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
121
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
122 if (! found)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
123 {
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
124 m_named_idx.resize (dim_vector (inames+1, 1));
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
125 m_named_idx(inames) = m_names;
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
126 m_named_pats.append (tmp_name);
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
127 m_names++;
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
128 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
129
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
130 if (new_pos - pos > 0)
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
131 buf << m_pattern.substr (pos, new_pos-pos);
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
132 if (inames < 10)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
133 buf << "(?P<n00" << inames++;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
134 else if (inames < 100)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
135 buf << "(?P<n0" << inames++;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
136 else
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
137 buf << "(?P<n" << inames++;
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
138
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
139 pos = tmp_pos;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
140 }
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
141 else if (m_pattern.at (new_pos + 2) == '<')
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
142 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
143 // Find lookbehind operators of arbitrary length (ie like
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
144 // "(?<=[a-z]*)") and replace with a maximum length operator
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
145 // as PCRE can not yet handle arbitrary length lookahead
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
146 // operators. Use the string length as the maximum length to
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
147 // avoid issues.
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
148
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
149 int brackets = 1;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
150 size_t tmp_pos1 = new_pos + 2;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
151 size_t tmp_pos2 = tmp_pos1;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
152
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
153 while (tmp_pos1 < m_pattern.length () && brackets > 0)
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
154 {
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
155 char ch = m_pattern.at (tmp_pos1);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
156
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
157 if (ch == '(')
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
158 brackets++;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
159 else if (ch == ')')
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
160 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
161 if (brackets > 1)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
162 tmp_pos2 = tmp_pos1;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
163
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
164 brackets--;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
165 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
166
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
167 tmp_pos1++;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
168 }
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
169
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
170 if (brackets != 0)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
171 {
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
172 buf << m_pattern.substr (pos, new_pos - pos) << "(?";
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
173 pos = new_pos + 2;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
174 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
175 else
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
176 {
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
177 size_t tmp_pos3 = m_pattern.find_first_of ("*+", tmp_pos2);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
178
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
179 if (tmp_pos3 != std::string::npos && tmp_pos3 < tmp_pos1)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
180 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
181 if (! lookbehind_warned)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
182 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
183 lookbehind_warned = true;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
184 (*current_liboctave_warning_with_id_handler)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
185 ("Octave:regexp-lookbehind-limit",
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
186 "%s: arbitrary length lookbehind patterns are only supported up to length %d",
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
187 m_who.c_str (), MAXLOOKBEHIND);
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
188 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
189
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
190 buf << m_pattern.substr (pos, new_pos - pos) << '(';
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
191
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
192 size_t i;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
193
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
194 if (m_pattern.at (tmp_pos3) == '*')
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
195 i = 0;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
196 else
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
197 i = 1;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
198
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
199 for (; i < max_length + 1; i++)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
200 {
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
201 buf << m_pattern.substr (new_pos, tmp_pos3 - new_pos)
23807
336f89b6208b Use character literals 'c' rather than string literals "c" when possible.
Rik <rik@octave.org>
parents: 23795
diff changeset
202 << '{' << i << '}';
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
203 buf << m_pattern.substr (tmp_pos3 + 1,
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
204 tmp_pos1 - tmp_pos3 - 1);
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
205 if (i != max_length)
23807
336f89b6208b Use character literals 'c' rather than string literals "c" when possible.
Rik <rik@octave.org>
parents: 23795
diff changeset
206 buf << '|';
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
207 }
23807
336f89b6208b Use character literals 'c' rather than string literals "c" when possible.
Rik <rik@octave.org>
parents: 23795
diff changeset
208 buf << ')';
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
209 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
210 else
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
211 buf << m_pattern.substr (pos, tmp_pos1 - pos);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
212
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
213 pos = tmp_pos1;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
214 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
215 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
216 else
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
217 {
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
218 buf << m_pattern.substr (pos, new_pos - pos) << "(?";
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
219 pos = new_pos + 2;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
220 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
221
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
222 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
223
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
224 buf << m_pattern.substr (pos);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
225
25103
078b795c5219 maint: style check C++ ahead of 4.4 release.
Rik <rik@octave.org>
parents: 25054
diff changeset
226 // Replace NULLs with escape sequence because conversion function c_str()
24741
00dfa167c1fe Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents: 24607
diff changeset
227 // will terminate string early at embedded NULLs.
00dfa167c1fe Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents: 24607
diff changeset
228 std::string buf_str = buf.str ();
00dfa167c1fe Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents: 24607
diff changeset
229 while ((pos = buf_str.find ('\0')) != std::string::npos)
00dfa167c1fe Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents: 24607
diff changeset
230 buf_str.replace (pos, 1, "\\000");
00dfa167c1fe Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents: 24607
diff changeset
231
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
232 const char *err;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
233 int erroffset;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
234
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
235 int pcre_options
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
236 = ( (m_options.case_insensitive () ? PCRE_CASELESS : 0)
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
237 | (m_options.dotexceptnewline () ? 0 : PCRE_DOTALL)
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
238 | (m_options.lineanchors () ? PCRE_MULTILINE : 0)
27570
74173f04d2a3 Use syntax for UTF-8 mode with PCRE that is used in the configure test (bug #35910).
Markus Mützel <markus.muetzel@gmx.de>
parents: 27528
diff changeset
239 | (m_options.freespacing () ? PCRE_EXTENDED : 0)
74173f04d2a3 Use syntax for UTF-8 mode with PCRE that is used in the configure test (bug #35910).
Markus Mützel <markus.muetzel@gmx.de>
parents: 27528
diff changeset
240 | PCRE_UTF8);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
241
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
242 m_data = pcre_compile (buf_str.c_str (), pcre_options,
24603
845ec6f4fb96 Fix incorrect regexprep on ARM platforms (bug #52810).
Rik <rik@octave.org>
parents: 24534
diff changeset
243 &err, &erroffset, nullptr);
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
244
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
245 if (! m_data)
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
246 (*current_liboctave_error_handler)
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
247 ("%s: %s at position %d of expression", m_who.c_str (), err, erroffset);
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
248 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
249
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
250 regexp::match_data
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
251 regexp::match (const std::string& buffer)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
252 {
27528
19ad9150dd69 Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents: 27384
diff changeset
253 // check if input is valid utf-8
19ad9150dd69 Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents: 27384
diff changeset
254 const uint8_t *buf_str = reinterpret_cast<const uint8_t *> (buffer.c_str ());
19ad9150dd69 Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents: 27384
diff changeset
255 if (octave_u8_check_wrapper (buf_str, buffer.length ()))
19ad9150dd69 Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents: 27384
diff changeset
256 (*current_liboctave_error_handler)
19ad9150dd69 Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents: 27384
diff changeset
257 ("%s: the input string is invalid UTF-8", m_who.c_str ());
19ad9150dd69 Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents: 27384
diff changeset
258
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
259 regexp::match_data retval;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
260
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
261 std::list<regexp::match_element> lst;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
262
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
263 int subpatterns;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
264 int namecount;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
265 int nameentrysize;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
266 char *nametable;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
267 size_t idx = 0;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
268
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
269 pcre *re = static_cast<pcre *> (m_data);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
270
23795
980f39c3ab90 Use C++11 nullptr rather than 0 in code (bug #51565).
Rik <rik@octave.org>
parents: 23302
diff changeset
271 pcre_fullinfo (re, nullptr, PCRE_INFO_CAPTURECOUNT, &subpatterns);
980f39c3ab90 Use C++11 nullptr rather than 0 in code (bug #51565).
Rik <rik@octave.org>
parents: 23302
diff changeset
272 pcre_fullinfo (re, nullptr, PCRE_INFO_NAMECOUNT, &namecount);
980f39c3ab90 Use C++11 nullptr rather than 0 in code (bug #51565).
Rik <rik@octave.org>
parents: 23302
diff changeset
273 pcre_fullinfo (re, nullptr, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
980f39c3ab90 Use C++11 nullptr rather than 0 in code (bug #51565).
Rik <rik@octave.org>
parents: 23302
diff changeset
274 pcre_fullinfo (re, nullptr, PCRE_INFO_NAMETABLE, &nametable);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
275
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
276 OCTAVE_LOCAL_BUFFER (int, ovector, (subpatterns+1)*3);
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
277 OCTAVE_LOCAL_BUFFER (int, nidx, namecount);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
278
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
279 for (int i = 0; i < namecount; i++)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
280 {
23302
78cf75ffd382 maint: tweak readability of code in lo-regexp.cc.
Rik <rik@octave.org>
parents: 23084
diff changeset
281 // Index of subpattern in first two bytes of name (MSB first).
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
282 // Extract index.
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
283 nidx[i] = (static_cast<int> (nametable[i*nameentrysize])) << 8
22402
4caa7b28d183 maint: Style check C++ code in liboctave/
Rik <rik@octave.org>
parents: 22333
diff changeset
284 | static_cast<int> (nametable[i*nameentrysize+1]);
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
285 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
286
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
287 while (true)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
288 {
22860
0b1e25cc4457 eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents: 22755
diff changeset
289 octave_quit ();
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
290
23795
980f39c3ab90 Use C++11 nullptr rather than 0 in code (bug #51565).
Rik <rik@octave.org>
parents: 23302
diff changeset
291 int matches = pcre_exec (re, nullptr, buffer.c_str (),
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
292 buffer.length (), idx,
27528
19ad9150dd69 Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents: 27384
diff changeset
293 PCRE_NO_UTF8_CHECK | (idx ? PCRE_NOTBOL : 0),
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
294 ovector, (subpatterns+1)*3);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
295
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
296 if (matches == PCRE_ERROR_MATCHLIMIT)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
297 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
298 // Try harder; start with default value for MATCH_LIMIT
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
299 // and increase it.
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
300 (*current_liboctave_warning_with_id_handler)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
301 ("Octave:regexp-match-limit",
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
302 "your pattern caused PCRE to hit its MATCH_LIMIT; trying harder now, but this will be slow");
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
303
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
304 pcre_extra pe;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
305
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
306 pcre_config (PCRE_CONFIG_MATCH_LIMIT,
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
307 static_cast<void *> (&pe.match_limit));
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
308
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
309 pe.flags = PCRE_EXTRA_MATCH_LIMIT;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
310
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
311 int i = 0;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
312 while (matches == PCRE_ERROR_MATCHLIMIT
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
313 && i++ < PCRE_MATCHLIMIT_MAX)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
314 {
22860
0b1e25cc4457 eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents: 22755
diff changeset
315 octave_quit ();
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
316
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
317 pe.match_limit *= 10;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
318 matches = pcre_exec (re, &pe, buffer.c_str (),
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
319 buffer.length (), idx,
27528
19ad9150dd69 Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents: 27384
diff changeset
320 PCRE_NO_UTF8_CHECK
19ad9150dd69 Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents: 27384
diff changeset
321 | (idx ? PCRE_NOTBOL : 0),
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
322 ovector, (subpatterns+1)*3);
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
323 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
324 }
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
325
27528
19ad9150dd69 Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents: 27384
diff changeset
326 if (matches < 0 && matches != PCRE_ERROR_NOMATCH)
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
327 (*current_liboctave_error_handler)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
328 ("%s: internal error calling pcre_exec; "
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
329 "error code from pcre_exec is %i", m_who.c_str (), matches);
21136
7cac4e7458f2 maint: clean up code around calls to current_liboctave_error_handler.
Rik <rik@octave.org>
parents: 20955
diff changeset
330
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
331 if (matches == PCRE_ERROR_NOMATCH)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
332 break;
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
333 else if (ovector[0] >= ovector[1] && ! m_options.emptymatch ())
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
334 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
335 // Zero length match. Skip to next char.
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
336 idx = ovector[0] + 1;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
337 if (idx < buffer.length ())
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
338 continue;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
339 else
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
340 break;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
341 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
342 else
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
343 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
344 int pos_match = 0;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
345 Matrix token_extents (matches-1, 2);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
346
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
347 for (int i = 1; i < matches; i++)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
348 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
349 if (ovector[2*i] >= 0 && ovector[2*i+1] > 0
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
350 && (i == 1 || ovector[2*i] != ovector[2*i-2]
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
351 || ovector[2*i-1] != ovector[2*i+1]))
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
352 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
353 token_extents(pos_match,0) = double (ovector[2*i]+1);
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
354 token_extents(pos_match++,1) = double (ovector[2*i+1]);
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
355 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
356 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
357
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
358 token_extents.resize (pos_match, 2);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
359
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
360 double start = double (ovector[0]+1);
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
361 double end = double (ovector[1]);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
362
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
363 const char **listptr;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
364 int status = pcre_get_substring_list (buffer.c_str (), ovector,
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
365 matches, &listptr);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
366
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
367 if (status == PCRE_ERROR_NOMEMORY)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
368 (*current_liboctave_error_handler)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
369 ("%s: cannot allocate memory in pcre_get_substring_list",
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
370 m_who.c_str ());
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
371
24741
00dfa167c1fe Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents: 24607
diff changeset
372 // Must use explicit length constructor as match can contain '\0'.
00dfa167c1fe Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents: 24607
diff changeset
373 std::string match_string = std::string (*listptr, end - start + 1);
00dfa167c1fe Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents: 24607
diff changeset
374
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
375 string_vector tokens (pos_match);
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
376 string_vector named_tokens (m_names);
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
377 int pos_offset = 0;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
378 pos_match = 0;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
379
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
380 for (int i = 1; i < matches; i++)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
381 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
382 if (ovector[2*i] >= 0 && ovector[2*i+1] > 0)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
383 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
384 if (i == 1 || ovector[2*i] != ovector[2*i-2]
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
385 || ovector[2*i-1] != ovector[2*i+1])
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
386 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
387 if (namecount > 0)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
388 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
389 // FIXME: Should probably do this with a map()
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
390 // rather than a linear search. However,
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
391 // the number of captured, named expressions
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
392 // is usually pretty small (< 4)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
393 for (int j = 0; j < namecount; j++)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
394 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
395 if (nidx[j] == i)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
396 {
24741
00dfa167c1fe Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents: 24607
diff changeset
397 size_t len = ovector[2*i+1] - ovector[2*i];
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
398 named_tokens(m_named_idx(j))
27281
db687716fed6 style fixes: generally aim to break long lines before operators, not after
John W. Eaton <jwe@octave.org>
parents: 27105
diff changeset
399 = std::string (*(listptr+i-pos_offset),
db687716fed6 style fixes: generally aim to break long lines before operators, not after
John W. Eaton <jwe@octave.org>
parents: 27105
diff changeset
400 len);
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
401 break;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
402 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
403 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
404 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
405
24741
00dfa167c1fe Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents: 24607
diff changeset
406 size_t len = ovector[2*i+1] - ovector[2*i];
00dfa167c1fe Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents: 24607
diff changeset
407 tokens(pos_match++) = std::string (*(listptr+i), len);
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
408 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
409 else
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
410 pos_offset++;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
411 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
412 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
413
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
414 pcre_free_substring_list (listptr);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
415
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
416 regexp::match_element new_elem (named_tokens, tokens, match_string,
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
417 token_extents, start, end);
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
418 lst.push_back (new_elem);
14536
6d5c951ec520 Add 'emptymatch', 'noemptymatch' options to regular expressions.
Rik <octave@nomad.inbox5.com>
parents: 14506
diff changeset
419
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
420 if (ovector[1] <= ovector[0])
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
421 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
422 // Zero length match. Skip to next char.
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
423 idx = ovector[0] + 1;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
424 if (idx <= buffer.length ())
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
425 continue;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
426 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
427 else
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
428 idx = ovector[1];
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
429
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
430 if (m_options.once () || idx >= buffer.length ())
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
431 break;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
432 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
433 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
434
27384
3db033e86376 use m_ prefix for data members in most liboctave/util classes
John W. Eaton <jwe@octave.org>
parents: 27372
diff changeset
435 retval = regexp::match_data (lst, m_named_pats);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
436
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
437 return retval;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
438 }
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
439
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
440 bool
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
441 regexp::is_match (const std::string& buffer)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
442 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
443 regexp::match_data rx_lst = match (buffer);
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
444
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
445 return rx_lst.size () > 0;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
446 }
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
447
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
448 Array<bool>
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
449 regexp::is_match (const string_vector& buffer)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
450 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
451 octave_idx_type len = buffer.numel ();
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
452
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
453 Array<bool> retval (dim_vector (len, 1));
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
454
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
455 for (octave_idx_type i = 0; i < buffer.numel (); i++)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
456 retval(i) = is_match (buffer(i));
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
457
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
458 return retval;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
459 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
460
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
461 // Declare rep_token_t used in processing replacement string
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
462 typedef struct
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
463 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
464 size_t pos;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
465 int num;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
466 } rep_token_t;
15541
9db32cabeacf Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents: 15271
diff changeset
467
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
468 std::string
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
469 regexp::replace (const std::string& buffer, const std::string& replacement)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
470 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
471 std::string retval;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
472
25342
416856765a55 be more careful with using auto in place of explicit const iterator decls
John W. Eaton <jwe@octave.org>
parents: 25337
diff changeset
473 const regexp::match_data rx_lst = match (buffer);
15541
9db32cabeacf Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents: 15271
diff changeset
474
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
475 size_t num_matches = rx_lst.size ();
15541
9db32cabeacf Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents: 15271
diff changeset
476
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
477 if (num_matches == 0)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
478 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
479 retval = buffer;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
480 return retval;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
481 }
15541
9db32cabeacf Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents: 15271
diff changeset
482
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
483 // Identify replacement tokens; build a vector of group numbers in
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
484 // the replacement string so that we can quickly calculate the size
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
485 // of the replacement.
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
486
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
487 // FIXME: All code assumes that only 10 tokens ($0-$9) exist.
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
488 // $11 represents $1 followed by the character '1' rather than
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
489 // the eleventh capture buffer.
15541
9db32cabeacf Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents: 15271
diff changeset
490
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
491 std::string repstr = replacement;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
492 std::vector<rep_token_t> tokens;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
493 tokens.reserve (5); // Reserve memory for 5 pattern replacements
15541
9db32cabeacf Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents: 15271
diff changeset
494
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
495 for (size_t i=0; i < repstr.size (); i++)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
496 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
497 if (repstr[i] == '\\')
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
498 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
499 if (i < repstr.size () - 1 && repstr[i+1] == '$')
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
500 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
501 repstr.erase (i,1); // erase backslash
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
502 i++; // skip over '$'
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
503 continue;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
504 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
505 if (i < repstr.size () - 1 && repstr[i+1] == '\\')
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
506 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
507 repstr.erase (i,1); // erase 1st backslash
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
508 continue;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
509 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
510 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
511 else if (repstr[i] == '$')
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
512 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
513 if (i < repstr.size () - 1 && isdigit (repstr[i+1]))
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
514 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
515 rep_token_t tmp_token;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
516
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
517 tmp_token.pos = i;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
518 tmp_token.num = repstr[i+1]-'0';
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
519 tokens.push_back (tmp_token);
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
520 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
521 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
522 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
523
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
524 std::string rep;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
525 int num_tokens = tokens.size ();
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
526
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
527 if (num_tokens > 0)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
528 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
529 // Determine replacement length
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
530 const size_t replen = repstr.size () - 2*num_tokens;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
531 int delta = 0;
25337
3ff9192b676e use auto keyword to declare iterator variables where possible
John W. Eaton <jwe@octave.org>
parents: 25166
diff changeset
532 auto p = rx_lst.begin ();
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
533 for (size_t i = 0; i < num_matches; i++)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
534 {
22860
0b1e25cc4457 eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents: 22755
diff changeset
535 octave_quit ();
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
536
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
537 double start = p->start ();
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
538 double end = p->end ();
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
539
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
540 const Matrix pairs (p->token_extents ());
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
541 size_t pairlen = 0;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
542 for (int j = 0; j < num_tokens; j++)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
543 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
544 if (tokens[j].num == 0)
24605
3e17190dfaea Fix incorrect regexprep on ARM platforms (bug #52810).
Rik <rik@octave.org>
parents: 23083
diff changeset
545 pairlen += static_cast<size_t> (end - start + 1);
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
546 else if (tokens[j].num <= pairs.rows ())
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
547 pairlen += static_cast<size_t> (pairs(tokens[j].num-1,1)
24605
3e17190dfaea Fix incorrect regexprep on ARM platforms (bug #52810).
Rik <rik@octave.org>
parents: 23083
diff changeset
548 - pairs(tokens[j].num-1,0)
3e17190dfaea Fix incorrect regexprep on ARM platforms (bug #52810).
Rik <rik@octave.org>
parents: 23083
diff changeset
549 + 1);
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
550 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
551 delta += (static_cast<int> (replen + pairlen)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
552 - static_cast<int> (end - start + 1));
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
553 p++;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
554 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
555
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
556 // Build replacement string
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
557 rep.reserve (buffer.size () + delta);
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
558 size_t from = 0;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
559 p = rx_lst.begin ();
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
560 for (size_t i = 0; i < num_matches; i++)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
561 {
22860
0b1e25cc4457 eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents: 22755
diff changeset
562 octave_quit ();
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
563
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
564 double start = p->start ();
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
565 double end = p->end ();
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
566
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
567 const Matrix pairs (p->token_extents ());
24605
3e17190dfaea Fix incorrect regexprep on ARM platforms (bug #52810).
Rik <rik@octave.org>
parents: 23083
diff changeset
568 rep.append (&buffer[from], static_cast<size_t> (start - 1 - from));
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
569 from = static_cast<size_t> (end);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
570
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
571 size_t cur_pos = 0;
15541
9db32cabeacf Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents: 15271
diff changeset
572
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
573 for (int j = 0; j < num_tokens; j++)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
574 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
575 rep.append (&repstr[cur_pos], (tokens[j].pos) - cur_pos);
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
576 cur_pos = tokens[j].pos+2;
15541
9db32cabeacf Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents: 15271
diff changeset
577
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
578 int k = tokens[j].num;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
579 if (k == 0)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
580 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
581 // replace with entire match
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
582 rep.append (&buffer[static_cast<size_t> (end - 1)],
24605
3e17190dfaea Fix incorrect regexprep on ARM platforms (bug #52810).
Rik <rik@octave.org>
parents: 23083
diff changeset
583 static_cast<size_t> (end - start + 1));
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
584 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
585 else if (k <= pairs.rows ())
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
586 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
587 // replace with group capture
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
588 rep.append (&buffer[static_cast<size_t> (pairs(k-1,0)-1)],
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
589 static_cast<size_t> (pairs(k-1,1)
24605
3e17190dfaea Fix incorrect regexprep on ARM platforms (bug #52810).
Rik <rik@octave.org>
parents: 23083
diff changeset
590 - pairs(k-1,0) + 1));
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
591 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
592 else
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
593 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
594 // replace with nothing
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
595 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
596 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
597 if (cur_pos < repstr.size ())
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
598 rep.append (&repstr[cur_pos], repstr.size () - cur_pos);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
599
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
600 p++;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
601 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
602 rep.append (&buffer[from], buffer.size () - from);
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
603 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
604 else
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
605 {
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
606 // Determine repstr length
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
607 const size_t replen = repstr.size ();
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
608 int delta = 0;
25337
3ff9192b676e use auto keyword to declare iterator variables where possible
John W. Eaton <jwe@octave.org>
parents: 25166
diff changeset
609 auto p = rx_lst.begin ();
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
610 for (size_t i = 0; i < num_matches; i++)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
611 {
22860
0b1e25cc4457 eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents: 22755
diff changeset
612 octave_quit ();
0b1e25cc4457 eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents: 22755
diff changeset
613
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
614 delta += static_cast<int> (replen)
22402
4caa7b28d183 maint: Style check C++ code in liboctave/
Rik <rik@octave.org>
parents: 22333
diff changeset
615 - static_cast<int> (p->end () - p->start () + 1);
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
616 p++;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
617 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
618
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
619 // Build replacement string
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
620 rep.reserve (buffer.size () + delta);
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
621 size_t from = 0;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
622 p = rx_lst.begin ();
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
623 for (size_t i = 0; i < num_matches; i++)
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
624 {
22860
0b1e25cc4457 eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents: 22755
diff changeset
625 octave_quit ();
0b1e25cc4457 eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents: 22755
diff changeset
626
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
627 rep.append (&buffer[from],
24605
3e17190dfaea Fix incorrect regexprep on ARM platforms (bug #52810).
Rik <rik@octave.org>
parents: 23083
diff changeset
628 static_cast<size_t> (p->start () - 1 - from));
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
629 from = static_cast<size_t> (p->end ());
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
630 rep.append (repstr);
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
631 p++;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
632 }
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
633 rep.append (&buffer[from], buffer.size () - from);
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
634 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
635
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
636 retval = rep;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
637 return retval;
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
638 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
639 }