annotate liboctave/util/lo-regexp.cc @ 33634:4a70f390c85e default tip @

maint: Merge stable to default.
author Markus Mützel <markus.muetzel@gmx.de>
date Tue, 28 May 2024 15:25:54 +0200
parents 49128bdb9eb2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
27923
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
1 ////////////////////////////////////////////////////////////////////////
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
2 //
32632
2e484f9f1f18 maint: update Octave Project Developers copyright for the new year
John W. Eaton <jwe@octave.org>
parents: 32094
diff changeset
3 // Copyright (C) 2002-2024 The Octave Project Developers
27923
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
4 //
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
5 // See the file COPYRIGHT.md in the top-level directory of this
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
6 // distribution or <https://octave.org/copyright/>.
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
7 //
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
8 // This file is part of Octave.
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
9 //
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
10 // Octave is free software: you can redistribute it and/or modify it
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
11 // under the terms of the GNU General Public License as published by
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
12 // the Free Software Foundation, either version 3 of the License, or
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
13 // (at your option) any later version.
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
14 //
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
15 // Octave is distributed in the hope that it will be useful, but
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
18 // GNU General Public License for more details.
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
19 //
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
20 // You should have received a copy of the GNU General Public License
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
21 // along with Octave; see the file COPYING. If not, see
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
22 // <https://www.gnu.org/licenses/>.
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
23 //
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
24 ////////////////////////////////////////////////////////////////////////
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
25
21724
aba2e6293dd8 use "#if ..." consistently instead of "#ifdef" and "#ifndef"
John W. Eaton <jwe@octave.org>
parents: 21301
diff changeset
26 #if defined (HAVE_CONFIG_H)
21301
40de9f8f23a6 Use '#include "config.h"' rather than <config.h>.
Rik <rik@octave.org>
parents: 21202
diff changeset
27 # include "config.h"
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
28 #endif
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
29
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
30 #include <list>
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
31 #include <sstream>
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
32 #include <string>
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
33 #include <vector>
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
34
32091
d313e8ece0b8 lo-regexp.cc: Only include PCRE2/PCRE headers if feature test succeeded (bug #64172).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
35 #if defined (HAVE_PCRE2)
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
36 # define PCRE2_CODE_UNIT_WIDTH 8
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
37 # if defined (HAVE_PCRE2_H)
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
38 # include <pcre2.h>
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
39 # elif defined (HAVE_PCRE2_PCRE2_H)
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
40 # include <pcre2/pcre2.h>
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
41 # endif
32091
d313e8ece0b8 lo-regexp.cc: Only include PCRE2/PCRE headers if feature test succeeded (bug #64172).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
42 #elif defined (HAVE_PCRE)
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
43 # if defined (HAVE_PCRE_H)
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
44 # include <pcre.h>
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
45 # elif defined (HAVE_PCRE_PCRE_H)
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
46 # include <pcre/pcre.h>
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
47 # endif
14025
9867be070ee1 use pcre/pcre.h if it is present
John W. Eaton <jwe@octave.org>
parents: 14024
diff changeset
48 #endif
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
49
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
50 #include "Matrix.h"
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
51 #include "lo-error.h"
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
52 #include "oct-locbuf.h"
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
53 #include "quit.h"
16940
99122191d3dd maint: Rename regexp.h to lo-regexp.h, regexp.cc to lo-regexp.cc in liboctave.
Rik <rik@octave.org>
parents: 15819
diff changeset
54 #include "lo-regexp.h"
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
55 #include "str-vec.h"
27523
19ad9150dd69 Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents: 27379
diff changeset
56 #include "unistr-wrappers.h"
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
57 #include "unwind-prot.h"
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
58
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
59 #if defined (HAVE_PCRE2)
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
60 typedef pcre2_code octave_pcre_code;
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
61 typedef PCRE2_SIZE OCTAVE_PCRE_SIZE;
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
62 void (*octave_pcre_code_free) (octave_pcre_code *) = pcre2_code_free;
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
63 # define OCTAVE_PCRE_CASELESS PCRE2_CASELESS
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
64 # define OCTAVE_PCRE_DOTALL PCRE2_DOTALL
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
65 # define OCTAVE_PCRE_MULTILINE PCRE2_MULTILINE
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
66 # define OCTAVE_PCRE_EXTENDED PCRE2_EXTENDED
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
67 # define OCTAVE_PCRE_UTF PCRE2_UTF
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
68 # define OCTAVE_PCRE_INFO_CAPTURECOUNT PCRE2_INFO_CAPTURECOUNT
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
69 # define OCTAVE_PCRE_INFO_NAMECOUNT PCRE2_INFO_NAMECOUNT
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
70 # define OCTAVE_PCRE_INFO_NAMEENTRYSIZE PCRE2_INFO_NAMEENTRYSIZE
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
71 # define OCTAVE_PCRE_INFO_NAMETABLE PCRE2_INFO_NAMETABLE
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
72 #elif defined (HAVE_PCRE)
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
73 typedef pcre octave_pcre_code;
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
74 typedef int OCTAVE_PCRE_SIZE;
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
75 void (*octave_pcre_code_free) (void *) = pcre_free;
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
76 # define OCTAVE_PCRE_CASELESS PCRE_CASELESS
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
77 # define OCTAVE_PCRE_DOTALL PCRE_DOTALL
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
78 # define OCTAVE_PCRE_MULTILINE PCRE_MULTILINE
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
79 # define OCTAVE_PCRE_EXTENDED PCRE_EXTENDED
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
80 # define OCTAVE_PCRE_UTF PCRE_UTF8
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
81 # define OCTAVE_PCRE_INFO_CAPTURECOUNT PCRE_INFO_CAPTURECOUNT
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
82 # define OCTAVE_PCRE_INFO_NAMECOUNT PCRE_INFO_NAMECOUNT
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
83 # define OCTAVE_PCRE_INFO_NAMEENTRYSIZE PCRE_INFO_NAMEENTRYSIZE
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
84 # define OCTAVE_PCRE_INFO_NAMETABLE PCRE_INFO_NAMETABLE
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
85 #else
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
86 # error "PCRE2 or PCRE library is required to build Octave"
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
87 #endif
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
88
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
89 static inline int
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
90 octave_pcre_pattern_info (const octave_pcre_code *code, int what, void *where)
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
91 {
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
92 #if defined (HAVE_PCRE2)
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
93 return pcre2_pattern_info (code, what, where);
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
94 #else
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
95 return pcre_fullinfo (code, nullptr, what, where);
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
96 #endif
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
97 }
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
98
31605
e88a07dec498 maint: Use macros to begin/end C++ namespaces.
Rik <rik@octave.org>
parents: 31425
diff changeset
99 OCTAVE_BEGIN_NAMESPACE(octave)
e88a07dec498 maint: Use macros to begin/end C++ namespaces.
Rik <rik@octave.org>
parents: 31425
diff changeset
100
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
101 // Define the maximum number of retries for a pattern
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
102 // that possibly results in an infinite recursion.
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
103 #define PCRE_MATCHLIMIT_MAX 10
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
104
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
105 // FIXME: should this be configurable?
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
106 #define MAXLOOKBEHIND 10
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
107
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
108 static bool lookbehind_warned = false;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
109
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
110 // FIXME: don't bother collecting and composing return values
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
111 // the user doesn't want.
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
112
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
113 void
31771
21f9b34eb893 maint: Eliminate "(void)" in C++ function prototypes/declarations.
Rik <rik@octave.org>
parents: 31706
diff changeset
114 regexp::free ()
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
115 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
116 octave_pcre_code_free (static_cast<octave_pcre_code *> (m_code));
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
117 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
118
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
119 void
31771
21f9b34eb893 maint: Eliminate "(void)" in C++ function prototypes/declarations.
Rik <rik@octave.org>
parents: 31706
diff changeset
120 regexp::compile_internal ()
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
121 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
122 // If we had a previously compiled pattern, release it.
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
123 free ();
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
124
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
125 std::size_t max_length = MAXLOOKBEHIND;
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
126
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
127 std::size_t pos = 0;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
128 std::size_t new_pos;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
129 int inames = 0;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
130 std::ostringstream buf;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
131
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
132 while ((new_pos = m_pattern.find ("(?", pos)) != std::string::npos)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
133 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
134 std::size_t tmp_pos;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
135 if (m_pattern.size () > new_pos + 2
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
136 && m_pattern.at (new_pos + 2) == '<'
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
137 && ! (m_pattern.size () > new_pos + 3
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
138 && (m_pattern.at (new_pos + 3) == '='
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
139 || m_pattern.at (new_pos + 3) == '!'))
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
140 && (tmp_pos = m_pattern.find_first_of ('>', new_pos))
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
141 != std::string::npos
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
142 && m_pattern.find_first_of (')', tmp_pos) != std::string::npos)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
143 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
144 // The syntax of named tokens in pcre is "(?P<name>...)" while
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
145 // we need a syntax "(?<name>...)", so fix that here. Also an
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
146 // expression like
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
147 // "(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)"
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
148 // should be perfectly legal, while pcre does not allow the same
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
149 // named token name on both sides of the alternative. Also fix
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
150 // that here by replacing name tokens by dummy names, and dealing
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
151 // with the dummy names later.
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
152
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
153 std::string tmp_name
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
154 = m_pattern.substr (new_pos+3, tmp_pos-new_pos-3);
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
155
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
156 bool found = false;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
157
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
158 for (int i = 0; i < m_names; i++)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
159 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
160 if (m_named_pats(i) == tmp_name)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
161 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
162 m_named_idx.resize (dim_vector (inames+1, 1));
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
163 m_named_idx(inames) = i;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
164 found = true;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
165 break;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
166 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
167 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
168
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
169 if (! found)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
170 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
171 m_named_idx.resize (dim_vector (inames+1, 1));
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
172 m_named_idx(inames) = m_names;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
173 m_named_pats.append (tmp_name);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
174 m_names++;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
175 }
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
176
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
177 if (new_pos - pos > 0)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
178 buf << m_pattern.substr (pos, new_pos-pos);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
179 if (inames < 10)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
180 buf << "(?P<n00" << inames++;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
181 else if (inames < 100)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
182 buf << "(?P<n0" << inames++;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
183 else
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
184 buf << "(?P<n" << inames++;
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
185
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
186 pos = tmp_pos;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
187 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
188 else if (m_pattern.size () > new_pos + 2
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
189 && m_pattern.at (new_pos + 2) == '<')
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
190 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
191 // Find lookbehind operators of arbitrary length (ie like
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
192 // "(?<=[a-z]*)") and replace with a maximum length operator
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
193 // as PCRE can not yet handle arbitrary length lookahead
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
194 // operators. Use the string length as the maximum length to
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
195 // avoid issues.
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
196
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
197 int brackets = 1;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
198 std::size_t tmp_pos1 = new_pos + 2;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
199 std::size_t tmp_pos2 = tmp_pos1;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
200
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
201 while (tmp_pos1 < m_pattern.length () && brackets > 0)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
202 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
203 char ch = m_pattern.at (tmp_pos1);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
204
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
205 if (ch == '(')
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
206 brackets++;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
207 else if (ch == ')')
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
208 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
209 if (brackets > 1)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
210 tmp_pos2 = tmp_pos1;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
211
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
212 brackets--;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
213 }
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
214
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
215 tmp_pos1++;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
216 }
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
217
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
218 if (brackets != 0)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
219 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
220 buf << m_pattern.substr (pos, new_pos - pos) << "(?";
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
221 pos = new_pos + 2;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
222 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
223 else
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
224 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
225 std::size_t tmp_pos3 = m_pattern.find_first_of ("*+", tmp_pos2);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
226
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
227 if (tmp_pos3 != std::string::npos && tmp_pos3 < tmp_pos1)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
228 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
229 if (! lookbehind_warned)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
230 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
231 lookbehind_warned = true;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
232 (*current_liboctave_warning_with_id_handler)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
233 ("Octave:regexp-lookbehind-limit",
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
234 "%s: arbitrary length lookbehind patterns are only supported up to length %d",
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
235 m_who.c_str (), MAXLOOKBEHIND);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
236 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
237
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
238 buf << m_pattern.substr (pos, new_pos - pos) << '(';
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
239
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
240 std::size_t i;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
241
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
242 if (m_pattern.at (tmp_pos3) == '*')
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
243 i = 0;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
244 else
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
245 i = 1;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
246
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
247 for (; i < max_length + 1; i++)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
248 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
249 buf << m_pattern.substr (new_pos, tmp_pos3 - new_pos)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
250 << '{' << i << '}';
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
251 buf << m_pattern.substr (tmp_pos3 + 1,
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
252 tmp_pos1 - tmp_pos3 - 1);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
253 if (i != max_length)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
254 buf << '|';
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
255 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
256 buf << ')';
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
257 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
258 else
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
259 buf << m_pattern.substr (pos, tmp_pos1 - pos);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
260
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
261 pos = tmp_pos1;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
262 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
263 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
264 else
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
265 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
266 buf << m_pattern.substr (pos, new_pos - pos) << "(?";
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
267 pos = new_pos + 2;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
268 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
269
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
270 }
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
271
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
272 buf << m_pattern.substr (pos);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
273
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
274 // Replace NULLs with escape sequence because conversion function c_str()
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
275 // will terminate string early at embedded NULLs.
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
276 std::string buf_str = buf.str ();
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
277 while ((pos = buf_str.find ('\0')) != std::string::npos)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
278 buf_str.replace (pos, 1, "\\000");
24741
00dfa167c1fe Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents: 24607
diff changeset
279
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
280 int pcre_options
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
281 = ( (m_options.case_insensitive () ? OCTAVE_PCRE_CASELESS : 0)
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
282 | (m_options.dotexceptnewline () ? 0 : OCTAVE_PCRE_DOTALL)
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
283 | (m_options.lineanchors () ? OCTAVE_PCRE_MULTILINE : 0)
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
284 | (m_options.freespacing () ? OCTAVE_PCRE_EXTENDED : 0)
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
285 | OCTAVE_PCRE_UTF);
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
286
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
287 #if defined (HAVE_PCRE2)
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
288 PCRE2_SIZE erroffset;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
289 int errnumber;
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
290
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
291 m_code = pcre2_compile (reinterpret_cast<PCRE2_SPTR> (buf_str.c_str ()),
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
292 PCRE2_ZERO_TERMINATED, pcre_options,
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
293 &errnumber, &erroffset, nullptr);
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
294
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
295 if (! m_code)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
296 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
297 // PCRE docs say:
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
298 //
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
299 // If the buffer is too small, the message is truncated (but
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
300 // still with a trailing zero), and the negative error code
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
301 // PCRE2_ERROR_NOMEMORY is returned. None of the messages are
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
302 // very long; a buffer size of 120 code units is ample.
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
303 //
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
304 // so we assume that 256 will be large enough to avoid truncated
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
305 // messages.
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
306
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
307 PCRE2_UCHAR err [256];
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
308 pcre2_get_error_message (errnumber, err, sizeof (err));
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
309 (*current_liboctave_error_handler)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
310 ("%s: %s at position %zu of expression", m_who.c_str (), err,
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
311 erroffset);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
312 }
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
313 #else
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
314 const char *err;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
315 int erroffset;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
316
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
317 m_code = pcre_compile (buf_str.c_str (), pcre_options,
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
318 &err, &erroffset, nullptr);
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
319
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
320 if (! m_code)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
321 (*current_liboctave_error_handler)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
322 ("%s: %s at position %d of expression", m_who.c_str (), err, erroffset);
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
323 #endif
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
324 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
325
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
326 regexp::match_data
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
327 regexp::match (const std::string& buffer) const
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
328 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
329 // check if input is valid utf-8
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
330 const uint8_t *buf_str = reinterpret_cast<const uint8_t *> (buffer.c_str ());
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
331 if (octave_u8_check_wrapper (buf_str, buffer.length ()))
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
332 (*current_liboctave_error_handler)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
333 ("%s: the input string is invalid UTF-8", m_who.c_str ());
27523
19ad9150dd69 Check if input to regexp is valid UTF-8 is faster than PCRE's checks (bug #57064).
Markus Mützel <markus.muetzel@gmx.de>
parents: 27379
diff changeset
334
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
335 regexp::match_data retval;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
336
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
337 std::list<regexp::match_element> lst;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
338
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
339 int subpatterns;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
340 int namecount;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
341 int nameentrysize;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
342 char *nametable;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
343 std::size_t idx = 0;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
344
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
345 octave_pcre_code *re = static_cast<octave_pcre_code *> (m_code);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
346
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
347 octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_CAPTURECOUNT, &subpatterns);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
348 octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMECOUNT, &namecount);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
349 octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
350 octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMETABLE, &nametable);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
351
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
352 #if defined (HAVE_PCRE)
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
353 OCTAVE_LOCAL_BUFFER (OCTAVE_PCRE_SIZE, ovector, (subpatterns+1)*3);
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
354 #endif
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
355
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
356 OCTAVE_LOCAL_BUFFER (int, nidx, namecount);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
357
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
358 for (int i = 0; i < namecount; i++)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
359 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
360 // Index of subpattern in first two bytes of name (MSB first).
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
361 // Extract index.
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
362 nidx[i] = (static_cast<int> (nametable[i*nameentrysize])) << 8
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
363 | static_cast<int> (nametable[i*nameentrysize+1]);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
364 }
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
365
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
366 while (true)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
367 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
368 octave_quit ();
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
369
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
370 #if defined (HAVE_PCRE2)
33034
49128bdb9eb2 use explicit lambda-expression captures (bug #65318)
John W. Eaton <jwe@octave.org>
parents: 32936
diff changeset
371 pcre2_match_data *tmp_match_data
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
372 = pcre2_match_data_create_from_pattern (re, nullptr);
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
373
33034
49128bdb9eb2 use explicit lambda-expression captures (bug #65318)
John W. Eaton <jwe@octave.org>
parents: 32936
diff changeset
374 unwind_action cleanup_match_data ([tmp_match_data] () { pcre2_match_data_free (tmp_match_data); });
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
375
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
376 int matches = pcre2_match (re, reinterpret_cast<PCRE2_SPTR> (buffer.c_str ()),
22333
2758af148ced move base_list and regexp classes to octave namespace
John W. Eaton <jwe@octave.org>
parents: 22323
diff changeset
377 buffer.length (), idx,
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
378 PCRE2_NO_UTF_CHECK | (idx ? PCRE2_NOTBOL : 0),
33034
49128bdb9eb2 use explicit lambda-expression captures (bug #65318)
John W. Eaton <jwe@octave.org>
parents: 32936
diff changeset
379 tmp_match_data, nullptr);
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
380
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
381 if (matches < 0 && matches != PCRE2_ERROR_NOMATCH)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
382 (*current_liboctave_error_handler)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
383 ("%s: internal error calling pcre2_match; "
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
384 "error code from pcre2_match is %i", m_who.c_str (), matches);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
385
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
386 if (matches == PCRE2_ERROR_NOMATCH)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
387 break;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
388
33034
49128bdb9eb2 use explicit lambda-expression captures (bug #65318)
John W. Eaton <jwe@octave.org>
parents: 32936
diff changeset
389 OCTAVE_PCRE_SIZE *ovector = pcre2_get_ovector_pointer (tmp_match_data);
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
390 #else
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
391 int matches = pcre_exec (re, nullptr, buffer.c_str (),
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
392 buffer.length (), idx,
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
393 PCRE_NO_UTF8_CHECK | (idx ? PCRE_NOTBOL : 0),
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
394 ovector, (subpatterns+1)*3);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
395
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
396 if (matches == PCRE_ERROR_MATCHLIMIT)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
397 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
398 // Try harder; start with default value for MATCH_LIMIT
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
399 // and increase it.
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
400 (*current_liboctave_warning_with_id_handler)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
401 ("Octave:regexp-match-limit",
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
402 "your pattern caused PCRE to hit its MATCH_LIMIT; trying harder now, but this will be slow");
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
403
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
404 pcre_extra pe;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
405
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
406 pcre_config (PCRE_CONFIG_MATCH_LIMIT,
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
407 static_cast<void *> (&pe.match_limit));
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
408
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
409 pe.flags = PCRE_EXTRA_MATCH_LIMIT;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
410
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
411 int i = 0;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
412 while (matches == PCRE_ERROR_MATCHLIMIT
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
413 && i++ < PCRE_MATCHLIMIT_MAX)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
414 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
415 octave_quit ();
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
416
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
417 pe.match_limit *= 10;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
418 matches = pcre_exec (re, &pe, buffer.c_str (),
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
419 buffer.length (), idx,
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
420 PCRE_NO_UTF8_CHECK
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
421 | (idx ? PCRE_NOTBOL : 0),
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
422 ovector, (subpatterns+1)*3);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
423 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
424 }
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
425
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
426 if (matches < 0 && matches != PCRE_ERROR_NOMATCH)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
427 (*current_liboctave_error_handler)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
428 ("%s: internal error calling pcre_exec; "
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
429 "error code from pcre_exec is %i", m_who.c_str (), matches);
21136
7cac4e7458f2 maint: clean up code around calls to current_liboctave_error_handler.
Rik <rik@octave.org>
parents: 20955
diff changeset
430
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
431 if (matches == PCRE_ERROR_NOMATCH)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
432 break;
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
433 #endif
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
434 if (ovector[0] >= ovector[1] && ! m_options.emptymatch ())
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
435 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
436 // Zero length match. Skip to next char.
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
437 idx = ovector[0] + 1;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
438 if (idx < buffer.length ())
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
439 continue;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
440 else
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
441 break;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
442 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
443 else
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
444 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
445 int pos_match = 0;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
446 Matrix token_extents (matches-1, 2);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
447
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
448 for (int i = 1; i < matches; i++)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
449 {
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
450 #if defined (HAVE_PCRE2)
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
451 if (ovector[2*i] != PCRE2_SIZE_MAX
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
452 #else
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
453 if (ovector[2*i] >= 0
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
454 #endif
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
455 && ovector[2*i+1] > 0
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
456 && (i == 1 || ovector[2*i] != ovector[2*i-2]
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
457 || ovector[2*i-1] != ovector[2*i+1]))
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
458 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
459 token_extents(pos_match, 0) = double (ovector[2*i]+1);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
460 token_extents(pos_match++, 1) = double (ovector[2*i+1]);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
461 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
462 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
463
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
464 token_extents.resize (pos_match, 2);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
465
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
466 OCTAVE_PCRE_SIZE start = ovector[0] + 1;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
467 OCTAVE_PCRE_SIZE end = ovector[1];
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
468
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
469 #if defined (HAVE_PCRE2)
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
470 // Must use explicit length constructor as match can contain '\0'.
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
471 std::string match_string = std::string (buffer.c_str() + start - 1,
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
472 end - start + 1);
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
473 #else
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
474 const char **listptr;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
475 int status = pcre_get_substring_list (buffer.c_str (), ovector,
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
476 matches, &listptr);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
477
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
478 if (status == PCRE_ERROR_NOMEMORY)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
479 (*current_liboctave_error_handler)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
480 ("%s: cannot allocate memory in pcre_get_substring_list",
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
481 m_who.c_str ());
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
482
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
483 // Must use explicit length constructor as match can contain '\0'.
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
484 std::string match_string = std::string (*listptr, end - start + 1);
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
485 #endif
24741
00dfa167c1fe Fix handling of NULL character in regular expressions for Matlab compatibility.
Rik <rik@octave.org>
parents: 24607
diff changeset
486
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
487 string_vector tokens (pos_match);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
488 string_vector named_tokens (m_names);
32094
f3d12359f0e4 lo-regexp.cc: Avoid unused variable warning with PCRE2.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32092
diff changeset
489 #if ! defined (HAVE_PCRE2)
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
490 int pos_offset = 0;
32094
f3d12359f0e4 lo-regexp.cc: Avoid unused variable warning with PCRE2.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32092
diff changeset
491 #endif
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
492 pos_match = 0;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
493
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
494 for (int i = 1; i < matches; i++)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
495 {
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
496 #if defined (HAVE_PCRE2)
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
497 if (ovector[2*i] != PCRE2_SIZE_MAX
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
498 #else
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
499 if (ovector[2*i] >= 0
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
500 #endif
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
501 && ovector[2*i+1] > 0)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
502 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
503 if (i == 1 || ovector[2*i] != ovector[2*i-2]
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
504 || ovector[2*i-1] != ovector[2*i+1])
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
505 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
506 if (namecount > 0)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
507 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
508 // FIXME: Should probably do this with a map()
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
509 // rather than a linear search. However,
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
510 // the number of captured, named expressions
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
511 // is usually pretty small (< 4)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
512 for (int j = 0; j < namecount; j++)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
513 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
514 if (nidx[j] == i)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
515 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
516 std::size_t len = ovector[2*i+1] - ovector[2*i];
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
517 named_tokens(m_named_idx(j))
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
518 #if defined (HAVE_PCRE2)
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
519 = std::string (buffer.c_str () + ovector[2*i], len);
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
520 #else
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
521 = std::string (*(listptr+i-pos_offset), len);
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
522 #endif
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
523 break;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
524 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
525 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
526 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
527
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
528 std::size_t len = ovector[2*i+1] - ovector[2*i];
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
529 #if defined (HAVE_PCRE2)
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
530 tokens(pos_match++) = std::string (buffer.c_str() + ovector[2*i], len);
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
531 #else
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
532 tokens(pos_match++) = std::string (*(listptr+i), len);
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
533 #endif
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
534 }
32094
f3d12359f0e4 lo-regexp.cc: Avoid unused variable warning with PCRE2.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32092
diff changeset
535 #if ! defined (HAVE_PCRE2)
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
536 else
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
537 pos_offset++;
32094
f3d12359f0e4 lo-regexp.cc: Avoid unused variable warning with PCRE2.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32092
diff changeset
538 #endif
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
539 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
540 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
541
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
542 #if ! defined (HAVE_PCRE2)
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
543 pcre_free_substring_list (listptr);
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
544 #endif
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
545
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
546 // FIXME: MATCH_ELEMENT uses double values for these,
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
547 // presumably because that is what the Octave interpreter
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
548 // uses. Should we check that the values don't exceed
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
549 // flintmax here? It seems unlikely that it would happen,
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
550 // but...
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
551
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
552 double dstart = static_cast<double> (start);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
553 double dend = static_cast<double> (end);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
554
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
555 regexp::match_element new_elem (named_tokens, tokens, match_string,
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
556 token_extents,
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
557 dstart, dend);
31424
d1165473e4b0 allow Octave to use PCRE2 (bug #61542)
Rafael Laboissiere <rafael@laboissiere.net>
parents: 31423
diff changeset
558
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
559 lst.push_back (new_elem);
14536
6d5c951ec520 Add 'emptymatch', 'noemptymatch' options to regular expressions.
Rik <octave@nomad.inbox5.com>
parents: 14506
diff changeset
560
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
561 if (ovector[1] <= ovector[0])
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
562 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
563 // Zero length match. Skip to next char.
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
564 idx = ovector[0] + 1;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
565 if (idx <= buffer.length ())
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
566 continue;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
567 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
568 else
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
569 idx = ovector[1];
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
570
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
571 if (m_options.once () || idx >= buffer.length ())
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
572 break;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
573 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
574 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
575
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
576 retval = regexp::match_data (lst, m_named_pats);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
577
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
578 return retval;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
579 }
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
580
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
581 bool
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
582 regexp::is_match (const std::string& buffer) const
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
583 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
584 regexp::match_data rx_lst = match (buffer);
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
585
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
586 return rx_lst.size () > 0;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
587 }
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
588
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
589 Array<bool>
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
590 regexp::is_match (const string_vector& buffer) const
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
591 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
592 octave_idx_type len = buffer.numel ();
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
593
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
594 Array<bool> retval (dim_vector (len, 1));
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
595
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
596 for (octave_idx_type i = 0; i < buffer.numel (); i++)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
597 retval(i) = is_match (buffer(i));
7779
791231dac333 Add regexp matching to Fwho and Fclear
David Bateman <dbateman@free.fr>
parents:
diff changeset
598
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
599 return retval;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
600 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
601
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
602 // Declare rep_token_t used in processing replacement string
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
603 struct rep_token_t
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
604 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
605 std::size_t pos;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
606 int num;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
607 };
15541
9db32cabeacf Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents: 15271
diff changeset
608
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
609 std::string
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
610 regexp::replace (const std::string& buffer,
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
611 const std::string& replacement) const
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
612 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
613 std::string retval;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
614
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
615 const regexp::match_data rx_lst = match (buffer);
15541
9db32cabeacf Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents: 15271
diff changeset
616
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
617 std::size_t num_matches = rx_lst.size ();
15541
9db32cabeacf Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents: 15271
diff changeset
618
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
619 if (num_matches == 0)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
620 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
621 retval = buffer;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
622 return retval;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
623 }
15541
9db32cabeacf Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents: 15271
diff changeset
624
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
625 // Identify replacement tokens; build a vector of group numbers in
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
626 // the replacement string so that we can quickly calculate the size
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
627 // of the replacement.
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
628
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
629 // FIXME: All code assumes that only 10 tokens ($0-$9) exist.
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
630 // $11 represents $1 followed by the character '1' rather than
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
631 // the eleventh capture buffer.
15541
9db32cabeacf Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents: 15271
diff changeset
632
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
633 std::string repstr = replacement;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
634 std::vector<rep_token_t> tokens;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
635 tokens.reserve (5); // Reserve memory for 5 pattern replacements
15541
9db32cabeacf Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents: 15271
diff changeset
636
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
637 for (std::size_t i=0; i < repstr.size (); i++)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
638 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
639 if (repstr[i] == '\\')
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
640 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
641 if (i < repstr.size () - 1 && repstr[i+1] == '$')
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
642 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
643 repstr.erase (i, 1); // erase backslash
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
644 i++; // skip over '$'
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
645 continue;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
646 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
647 if (i < repstr.size () - 1 && repstr[i+1] == '\\')
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
648 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
649 repstr.erase (i, 1); // erase 1st backslash
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
650 continue;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
651 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
652 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
653 else if (repstr[i] == '$')
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
654 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
655 if (i < repstr.size () - 1 && isdigit (repstr[i+1]))
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
656 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
657 rep_token_t tmp_token;
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
658
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
659 tmp_token.pos = i;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
660 tmp_token.num = repstr[i+1]-'0';
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
661 tokens.push_back (tmp_token);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
662 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
663 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
664 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
665
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
666 std::string rep;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
667 int num_tokens = tokens.size ();
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
668
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
669 if (num_tokens > 0)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
670 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
671 // Determine replacement length
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
672 const std::size_t replen = repstr.size () - 2*num_tokens;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
673 int delta = 0;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
674 auto p = rx_lst.begin ();
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
675 for (std::size_t i = 0; i < num_matches; i++)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
676 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
677 octave_quit ();
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
678
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
679 double start = p->start ();
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
680 double end = p->end ();
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
681
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
682 const Matrix pairs (p->token_extents ());
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
683 std::size_t pairlen = 0;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
684 for (int j = 0; j < num_tokens; j++)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
685 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
686 if (tokens[j].num == 0)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
687 pairlen += static_cast<std::size_t> (end - start + 1);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
688 else if (tokens[j].num <= pairs.rows ())
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
689 pairlen += static_cast<std::size_t> (pairs(tokens[j].num-1, 1)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
690 - pairs(tokens[j].num-1, 0)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
691 + 1);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
692 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
693 delta += (static_cast<int> (replen + pairlen)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
694 - static_cast<int> (end - start + 1));
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
695 p++;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
696 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
697
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
698 // Build replacement string
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
699 rep.reserve (buffer.size () + delta);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
700 std::size_t from = 0;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
701 p = rx_lst.begin ();
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
702 for (std::size_t i = 0; i < num_matches; i++)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
703 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
704 octave_quit ();
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
705
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
706 double start = p->start ();
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
707 double end = p->end ();
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
708
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
709 const Matrix pairs (p->token_extents ());
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
710 rep.append (&buffer[from], static_cast<std::size_t> (start - 1 - from));
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
711 from = static_cast<std::size_t> (end);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
712
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
713 std::size_t cur_pos = 0;
15541
9db32cabeacf Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents: 15271
diff changeset
714
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
715 for (int j = 0; j < num_tokens; j++)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
716 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
717 rep.append (&repstr[cur_pos], (tokens[j].pos) - cur_pos);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
718 cur_pos = tokens[j].pos+2;
15541
9db32cabeacf Fix backslash handling in regexp pattern (Bug #37092)
Rik <rik@octave.org>
parents: 15271
diff changeset
719
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
720 int k = tokens[j].num;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
721 if (k == 0)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
722 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
723 // replace with entire match
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
724 rep.append (&buffer[static_cast<std::size_t> (end - 1)],
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
725 static_cast<std::size_t> (end - start + 1));
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
726 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
727 else if (k <= pairs.rows ())
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
728 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
729 // replace with group capture
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
730 rep.append (&buffer[static_cast<std::size_t> (pairs(k-1, 0)-1)],
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
731 static_cast<std::size_t> (pairs(k-1, 1)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
732 - pairs(k-1, 0) + 1));
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
733 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
734 else
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
735 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
736 // replace with nothing
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
737 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
738 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
739 if (cur_pos < repstr.size ())
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
740 rep.append (&repstr[cur_pos], repstr.size () - cur_pos);
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
741
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
742 p++;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
743 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
744 rep.append (&buffer[from], buffer.size () - from);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
745 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
746 else
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
747 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
748 // Determine repstr length
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
749 const std::size_t replen = repstr.size ();
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
750 int delta = 0;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
751 auto p = rx_lst.begin ();
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
752 for (std::size_t i = 0; i < num_matches; i++)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
753 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
754 octave_quit ();
22860
0b1e25cc4457 eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents: 22755
diff changeset
755
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
756 delta += static_cast<int> (replen)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
757 - static_cast<int> (p->end () - p->start () + 1);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
758 p++;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
759 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
760
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
761 // Build replacement string
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
762 rep.reserve (buffer.size () + delta);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
763 std::size_t from = 0;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
764 p = rx_lst.begin ();
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
765 for (std::size_t i = 0; i < num_matches; i++)
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
766 {
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
767 octave_quit ();
22860
0b1e25cc4457 eliminate use of OCTAVE_QUIT macro in C++ sources
John W. Eaton <jwe@octave.org>
parents: 22755
diff changeset
768
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
769 rep.append (&buffer[from],
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
770 static_cast<std::size_t> (p->start () - 1 - from));
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
771 from = static_cast<std::size_t> (p->end ());
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
772 rep.append (repstr);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
773 p++;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
774 }
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
775 rep.append (&buffer[from], buffer.size () - from);
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
776 }
14024
fc9f204faea0 refactor regexp (bug #34440)
John W. Eaton <jwe@octave.org>
parents: 11586
diff changeset
777
31607
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
778 retval = rep;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
779 return retval;
aac27ad79be6 maint: Re-indent code after switch to using namespace macros.
Rik <rik@octave.org>
parents: 31605
diff changeset
780 }
31605
e88a07dec498 maint: Use macros to begin/end C++ namespaces.
Rik <rik@octave.org>
parents: 31425
diff changeset
781
e88a07dec498 maint: Use macros to begin/end C++ namespaces.
Rik <rik@octave.org>
parents: 31425
diff changeset
782 OCTAVE_END_NAMESPACE(octave)