Mercurial > gnulib
annotate lib/regcomp.c @ 40245:eeb8fe2e91ad
autoupdate
author | Paul Eggert <eggert@cs.ucla.edu> |
---|---|
date | Sun, 17 Mar 2019 16:29:28 -0700 |
parents | 1a63e9768c53 |
children |
rev | line source |
---|---|
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1 /* Extended regular expression matching and search library. |
40058 | 2 Copyright (C) 2002-2019 Free Software Foundation, Inc. |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3 This file is part of the GNU C Library. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
5 |
17233
b6b08f30c630
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16950
diff
changeset
|
6 The GNU C Library is free software; you can redistribute it and/or |
b6b08f30c630
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16950
diff
changeset
|
7 modify it under the terms of the GNU Lesser General Public |
b6b08f30c630
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16950
diff
changeset
|
8 License as published by the Free Software Foundation; either |
b6b08f30c630
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16950
diff
changeset
|
9 version 2.1 of the License, or (at your option) any later version. |
b6b08f30c630
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16950
diff
changeset
|
10 |
b6b08f30c630
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16950
diff
changeset
|
11 The GNU C Library is distributed in the hope that it will be useful, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
12 but WITHOUT ANY WARRANTY; without even the implied warranty of |
17233
b6b08f30c630
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16950
diff
changeset
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
b6b08f30c630
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16950
diff
changeset
|
14 Lesser General Public License for more details. |
b6b08f30c630
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16950
diff
changeset
|
15 |
b6b08f30c630
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16950
diff
changeset
|
16 You should have received a copy of the GNU Lesser General Public |
b6b08f30c630
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16950
diff
changeset
|
17 License along with the GNU C Library; if not, see |
19190 | 18 <https://www.gnu.org/licenses/>. */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
19 |
18093
00853c226336
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17848
diff
changeset
|
20 #ifdef _LIBC |
00853c226336
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17848
diff
changeset
|
21 # include <locale/weight.h> |
00853c226336
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17848
diff
changeset
|
22 #endif |
00853c226336
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17848
diff
changeset
|
23 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
24 static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
25 size_t length, reg_syntax_t syntax); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
26 static void re_compile_fastmap_iter (regex_t *bufp, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
27 const re_dfastate_t *init_state, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
28 char *fastmap); |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
29 static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
30 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
31 static void free_charset (re_charset_t *cset); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
32 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
33 static void free_workarea_compile (regex_t *preg); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
34 static reg_errcode_t create_initial_state (re_dfa_t *dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
35 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
36 static void optimize_utf8 (re_dfa_t *dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
37 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
38 static reg_errcode_t analyze (regex_t *preg); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
39 static reg_errcode_t preorder (bin_tree_t *root, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
40 reg_errcode_t (fn (void *, bin_tree_t *)), |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
41 void *extra); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
42 static reg_errcode_t postorder (bin_tree_t *root, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
43 reg_errcode_t (fn (void *, bin_tree_t *)), |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
44 void *extra); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
45 static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
46 static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
47 static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
48 bin_tree_t *node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
49 static reg_errcode_t calc_first (void *extra, bin_tree_t *node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
50 static reg_errcode_t calc_next (void *extra, bin_tree_t *node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
51 static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
52 static Idx duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint); |
6185
6b09f7f6ba73
* lib/regcomp.c (search_duplicated_node): Make first pointer arg
Paul Eggert <eggert@cs.ucla.edu>
parents:
6184
diff
changeset
|
53 static Idx search_duplicated_node (const re_dfa_t *dfa, Idx org_node, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
54 unsigned int constraint); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
55 static reg_errcode_t calc_eclosure (re_dfa_t *dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
56 static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
57 Idx node, bool root); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
58 static reg_errcode_t calc_inveclosure (re_dfa_t *dfa); |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
59 static Idx fetch_number (re_string_t *input, re_token_t *token, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
60 reg_syntax_t syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
61 static int peek_token (re_token_t *token, re_string_t *input, |
19445 | 62 reg_syntax_t syntax); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
63 static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
64 reg_syntax_t syntax, reg_errcode_t *err); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
65 static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
66 re_token_t *token, reg_syntax_t syntax, |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
67 Idx nest, reg_errcode_t *err); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
68 static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
69 re_token_t *token, reg_syntax_t syntax, |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
70 Idx nest, reg_errcode_t *err); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
71 static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
72 re_token_t *token, reg_syntax_t syntax, |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
73 Idx nest, reg_errcode_t *err); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
74 static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
75 re_token_t *token, reg_syntax_t syntax, |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
76 Idx nest, reg_errcode_t *err); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
77 static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
78 re_dfa_t *dfa, re_token_t *token, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
79 reg_syntax_t syntax, reg_errcode_t *err); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
80 static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
81 re_token_t *token, reg_syntax_t syntax, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
82 reg_errcode_t *err); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
83 static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
84 re_string_t *regexp, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
85 re_token_t *token, int token_len, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
86 re_dfa_t *dfa, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
87 reg_syntax_t syntax, |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
88 bool accept_hyphen); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
89 static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
90 re_string_t *regexp, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
91 re_token_t *token); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
92 #ifdef RE_ENABLE_I18N |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
93 static reg_errcode_t build_equiv_class (bitset_t sbcset, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
94 re_charset_t *mbcset, |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
95 Idx *equiv_class_alloc, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
96 const unsigned char *name); |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
97 static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
98 bitset_t sbcset, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
99 re_charset_t *mbcset, |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
100 Idx *char_class_alloc, |
17258
28b073aabf32
regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents:
17249
diff
changeset
|
101 const char *class_name, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
102 reg_syntax_t syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
103 #else /* not RE_ENABLE_I18N */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
104 static reg_errcode_t build_equiv_class (bitset_t sbcset, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
105 const unsigned char *name); |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
106 static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
107 bitset_t sbcset, |
17258
28b073aabf32
regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents:
17249
diff
changeset
|
108 const char *class_name, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
109 reg_syntax_t syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
110 #endif /* not RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
111 static bin_tree_t *build_charclass_op (re_dfa_t *dfa, |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
112 RE_TRANSLATE_TYPE trans, |
17258
28b073aabf32
regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents:
17249
diff
changeset
|
113 const char *class_name, |
28b073aabf32
regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents:
17249
diff
changeset
|
114 const char *extra, |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
115 bool non_match, reg_errcode_t *err); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
116 static bin_tree_t *create_tree (re_dfa_t *dfa, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
117 bin_tree_t *left, bin_tree_t *right, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
118 re_token_type_t type); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
119 static bin_tree_t *create_token_tree (re_dfa_t *dfa, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
120 bin_tree_t *left, bin_tree_t *right, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
121 const re_token_t *token); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
122 static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
123 static void free_token (re_token_t *node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
124 static reg_errcode_t free_tree (void *extra, bin_tree_t *node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
125 static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
126 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
127 /* This table gives an error message for each of the error codes listed |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
128 in regex.h. Obviously the order here has to be same as there. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
129 POSIX doesn't require that we do anything for REG_NOERROR, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
130 but why not be nice? */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
131 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
132 static const char __re_error_msgid[] = |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
133 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
134 #define REG_NOERROR_IDX 0 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
135 gettext_noop ("Success") /* REG_NOERROR */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
136 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
137 #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
138 gettext_noop ("No match") /* REG_NOMATCH */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
139 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
140 #define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
141 gettext_noop ("Invalid regular expression") /* REG_BADPAT */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
142 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
143 #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
144 gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
145 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
146 #define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
147 gettext_noop ("Invalid character class name") /* REG_ECTYPE */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
148 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
149 #define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
150 gettext_noop ("Trailing backslash") /* REG_EESCAPE */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
151 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
152 #define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
153 gettext_noop ("Invalid back reference") /* REG_ESUBREG */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
154 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
155 #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") |
18218 | 156 gettext_noop ("Unmatched [, [^, [:, [., or [=") /* REG_EBRACK */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
157 "\0" |
18218 | 158 #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [, [^, [:, [., or [=") |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
159 gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
160 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
161 #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
162 gettext_noop ("Unmatched \\{") /* REG_EBRACE */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
163 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
164 #define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
165 gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
166 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
167 #define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
168 gettext_noop ("Invalid range end") /* REG_ERANGE */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
169 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
170 #define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
171 gettext_noop ("Memory exhausted") /* REG_ESPACE */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
172 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
173 #define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
174 gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
175 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
176 #define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
177 gettext_noop ("Premature end of regular expression") /* REG_EEND */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
178 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
179 #define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
180 gettext_noop ("Regular expression too big") /* REG_ESIZE */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
181 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
182 #define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
183 gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
184 }; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
185 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
186 static const size_t __re_error_msgid_idx[] = |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
187 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
188 REG_NOERROR_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
189 REG_NOMATCH_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
190 REG_BADPAT_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
191 REG_ECOLLATE_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
192 REG_ECTYPE_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
193 REG_EESCAPE_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
194 REG_ESUBREG_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
195 REG_EBRACK_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
196 REG_EPAREN_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
197 REG_EBRACE_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
198 REG_BADBR_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
199 REG_ERANGE_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
200 REG_ESPACE_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
201 REG_BADRPT_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
202 REG_EEND_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
203 REG_ESIZE_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
204 REG_ERPAREN_IDX |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
205 }; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
206 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
207 /* Entry points for GNU code. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
208 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
209 /* re_compile_pattern is the GNU regular expression compiler: it |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
210 compiles PATTERN (of length LENGTH) and puts the result in BUFP. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
211 Returns 0 if the pattern was valid, otherwise an error string. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
212 |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
213 Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
214 are set in BUFP on entry. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
215 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
216 const char * |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
217 re_compile_pattern (const char *pattern, size_t length, |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
218 struct re_pattern_buffer *bufp) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
219 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
220 reg_errcode_t ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
221 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
222 /* And GNU code determines whether or not to get register information |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
223 by passing null for the REGS argument to re_match, etc., not by |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
224 setting no_sub, unless RE_NO_SUB is set. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
225 bufp->no_sub = !!(re_syntax_options & RE_NO_SUB); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
226 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
227 /* Match anchors at newline. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
228 bufp->newline_anchor = 1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
229 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
230 ret = re_compile_internal (bufp, pattern, length, re_syntax_options); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
231 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
232 if (!ret) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
233 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
234 return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
235 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
236 weak_alias (__re_compile_pattern, re_compile_pattern) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
237 |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
238 /* Set by 're_set_syntax' to the current regexp syntax to recognize. Can |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
239 also be assigned to arbitrarily: each pattern buffer stores its own |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
240 syntax, so it can be changed between regex compilations. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
241 /* This has no initializer because initialized variables in Emacs |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
242 become read-only after dumping. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
243 reg_syntax_t re_syntax_options; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
244 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
245 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
246 /* Specify the precise syntax of regexps for compilation. This provides |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
247 for compatibility for various utilities which historically have |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
248 different, incompatible syntaxes. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
249 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
250 The argument SYNTAX is a bit mask comprised of the various bits |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
251 defined in regex.h. We return the old syntax. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
252 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
253 reg_syntax_t |
18252
8162c20f4bc7
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18228
diff
changeset
|
254 re_set_syntax (reg_syntax_t syntax) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
255 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
256 reg_syntax_t ret = re_syntax_options; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
257 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
258 re_syntax_options = syntax; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
259 return ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
260 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
261 weak_alias (__re_set_syntax, re_set_syntax) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
262 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
263 int |
18252
8162c20f4bc7
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18228
diff
changeset
|
264 re_compile_fastmap (struct re_pattern_buffer *bufp) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
265 { |
16770
e011e0a7ab5a
regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents:
16730
diff
changeset
|
266 re_dfa_t *dfa = bufp->buffer; |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
267 char *fastmap = bufp->fastmap; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
268 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
269 memset (fastmap, '\0', sizeof (char) * SBC_MAX); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
270 re_compile_fastmap_iter (bufp, dfa->init_state, fastmap); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
271 if (dfa->init_state != dfa->init_state_word) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
272 re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
273 if (dfa->init_state != dfa->init_state_nl) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
274 re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
275 if (dfa->init_state != dfa->init_state_begbuf) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
276 re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap); |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
277 bufp->fastmap_accurate = 1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
278 return 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
279 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
280 weak_alias (__re_compile_fastmap, re_compile_fastmap) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
281 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
282 static inline void |
17346
cd38818bce4e
regex: rename remaining __attribute calls to __attribute__.
Gary V. Vaughan <gary@gnu.org>
parents:
17338
diff
changeset
|
283 __attribute__ ((always_inline)) |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
284 re_set_fastmap (char *fastmap, bool icase, int ch) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
285 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
286 fastmap[ch] = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
287 if (icase) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
288 fastmap[tolower (ch)] = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
289 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
290 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
291 /* Helper function for re_compile_fastmap. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
292 Compile fastmap for the initial_state INIT_STATE. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
293 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
294 static void |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
295 re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
296 char *fastmap) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
297 { |
16770
e011e0a7ab5a
regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents:
16730
diff
changeset
|
298 re_dfa_t *dfa = bufp->buffer; |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
299 Idx node_cnt; |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
300 bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE)); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
301 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
302 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
303 Idx node = init_state->nodes.elems[node_cnt]; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
304 re_token_type_t type = dfa->nodes[node].type; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
305 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
306 if (type == CHARACTER) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
307 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
308 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
309 #ifdef RE_ENABLE_I18N |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
310 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
311 { |
6119
c3bf2ea44695
Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
312 unsigned char buf[MB_LEN_MAX]; |
c3bf2ea44695
Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
313 unsigned char *p; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
314 wchar_t wc; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
315 mbstate_t state; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
316 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
317 p = buf; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
318 *p++ = dfa->nodes[node].opr.c; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
319 while (++node < dfa->nodes_len |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
320 && dfa->nodes[node].type == CHARACTER |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
321 && dfa->nodes[node].mb_partial) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
322 *p++ = dfa->nodes[node].opr.c; |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
323 memset (&state, '\0', sizeof (state)); |
10998
cc7a1af3872f
regex: replace mbrtowc with __mbrtowc.
Paolo Bonzini <bonzini@gnu.org>
parents:
10078
diff
changeset
|
324 if (__mbrtowc (&wc, (const char *) buf, p - buf, |
cc7a1af3872f
regex: replace mbrtowc with __mbrtowc.
Paolo Bonzini <bonzini@gnu.org>
parents:
10078
diff
changeset
|
325 &state) == p - buf |
18093
00853c226336
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17848
diff
changeset
|
326 && (__wcrtomb ((char *) buf, __towlower (wc), &state) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
327 != (size_t) -1)) |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
328 re_set_fastmap (fastmap, false, buf[0]); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
329 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
330 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
331 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
332 else if (type == SIMPLE_BRACKET) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
333 { |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
334 int i, ch; |
6214
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
335 for (i = 0, ch = 0; i < BITSET_WORDS; ++i) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
336 { |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
337 int j; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
338 bitset_word_t w = dfa->nodes[node].opr.sbcset[i]; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
339 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
340 if (w & ((bitset_word_t) 1 << j)) |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
341 re_set_fastmap (fastmap, icase, ch); |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
342 } |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
343 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
344 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
345 else if (type == COMPLEX_BRACKET) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
346 { |
11000
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
347 re_charset_t *cset = dfa->nodes[node].opr.mbcset; |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
348 Idx i; |
11000
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
349 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
350 # ifdef _LIBC |
11000
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
351 /* See if we have to try all bytes which start multiple collation |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
352 elements. |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
353 e.g. In da_DK, we want to catch 'a' since "aa" is a valid |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
354 collation element, and don't catch 'b' since 'b' is |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
355 the only collation element which starts from 'b' (and |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
356 it is caught by SIMPLE_BRACKET). */ |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
357 if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0 |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
358 && (cset->ncoll_syms || cset->nranges)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
359 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
360 const int32_t *table = (const int32_t *) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
361 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); |
6214
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
362 for (i = 0; i < SBC_MAX; ++i) |
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
363 if (table[i] < 0) |
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
364 re_set_fastmap (fastmap, icase, i); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
365 } |
11000
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
366 # endif /* _LIBC */ |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
367 |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
368 /* See if we have to start the match at all multibyte characters, |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
369 i.e. where we would not find an invalid sequence. This only |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
370 applies to multibyte character sets; for single byte character |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
371 sets, the SIMPLE_BRACKET again suffices. */ |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
372 if (dfa->mb_cur_max > 1 |
12351
0b80ccdc9aa4
regex: Fix fastmap for multibyte character ranges.
Paolo Bonzini <bonzini@gnu.org>
parents:
11000
diff
changeset
|
373 && (cset->nchar_classes || cset->non_match || cset->nranges |
11000
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
374 # ifdef _LIBC |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
375 || cset->nequiv_classes |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
376 # endif /* _LIBC */ |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
377 )) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
378 { |
11000
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
379 unsigned char c = 0; |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
380 do |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
381 { |
11000
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
382 mbstate_t mbs; |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
383 memset (&mbs, 0, sizeof (mbs)); |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
384 if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2) |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
385 re_set_fastmap (fastmap, false, (int) c); |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
386 } |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
387 while (++c != 0); |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
388 } |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
389 |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
390 else |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
391 { |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
392 /* ... Else catch all bytes which can start the mbchars. */ |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
393 for (i = 0; i < cset->nmbchars; ++i) |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
394 { |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
395 char buf[256]; |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
396 mbstate_t state; |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
397 memset (&state, '\0', sizeof (state)); |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
398 if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
399 re_set_fastmap (fastmap, icase, *(unsigned char *) buf); |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
400 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
401 { |
18093
00853c226336
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17848
diff
changeset
|
402 if (__wcrtomb (buf, __towlower (cset->mbchars[i]), &state) |
11000
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
403 != (size_t) -1) |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
404 re_set_fastmap (fastmap, false, *(unsigned char *) buf); |
683b379e6760
regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents:
10998
diff
changeset
|
405 } |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
406 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
407 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
408 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
409 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
410 else if (type == OP_PERIOD |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
411 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
412 || type == OP_UTF8_PERIOD |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
413 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
414 || type == END_OF_RE) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
415 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
416 memset (fastmap, '\1', sizeof (char) * SBC_MAX); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
417 if (type == END_OF_RE) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
418 bufp->can_be_null = 1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
419 return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
420 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
421 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
422 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
423 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
424 /* Entry point for POSIX code. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
425 /* regcomp takes a regular expression as a string and compiles it. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
426 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
427 PREG is a regex_t *. We do not expect any fields to be initialized, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
428 since POSIX says we shouldn't. Thus, we set |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
429 |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
430 'buffer' to the compiled pattern; |
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
431 'used' to the length of the compiled pattern; |
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
432 'syntax' to RE_SYNTAX_POSIX_EXTENDED if the |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
433 REG_EXTENDED bit in CFLAGS is set; otherwise, to |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
434 RE_SYNTAX_POSIX_BASIC; |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
435 'newline_anchor' to REG_NEWLINE being set in CFLAGS; |
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
436 'fastmap' to an allocated space for the fastmap; |
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
437 'fastmap_accurate' to zero; |
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
438 're_nsub' to the number of subexpressions in PATTERN. |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
439 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
440 PATTERN is the address of the pattern string. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
441 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
442 CFLAGS is a series of bits which affect compilation. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
443 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
444 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
445 use POSIX basic syntax. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
446 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
447 If REG_NEWLINE is set, then . and [^...] don't match newline. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
448 Also, regexec will try a match beginning after every newline. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
449 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
450 If REG_ICASE is set, then we considers upper- and lowercase |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
451 versions of letters to be equivalent when matching. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
452 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
453 If REG_NOSUB is set, then when PREG is passed to regexec, that |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
454 routine will report only success or failure, and nothing about the |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
455 registers. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
456 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
457 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
458 the return codes and their meanings.) */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
459 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
460 int |
40051 | 461 regcomp (regex_t *__restrict preg, const char *__restrict pattern, int cflags) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
462 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
463 reg_errcode_t ret; |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
464 reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
465 : RE_SYNTAX_POSIX_BASIC); |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
466 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
467 preg->buffer = NULL; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
468 preg->allocated = 0; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
469 preg->used = 0; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
470 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
471 /* Try to allocate space for the fastmap. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
472 preg->fastmap = re_malloc (char, SBC_MAX); |
39918 | 473 if (__glibc_unlikely (preg->fastmap == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
474 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
475 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
476 syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
477 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
478 /* If REG_NEWLINE is set, newlines are treated differently. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
479 if (cflags & REG_NEWLINE) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
480 { /* REG_NEWLINE implies neither . nor [^...] match newline. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
481 syntax &= ~RE_DOT_NEWLINE; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
482 syntax |= RE_HAT_LISTS_NOT_NEWLINE; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
483 /* It also changes the matching behavior. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
484 preg->newline_anchor = 1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
485 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
486 else |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
487 preg->newline_anchor = 0; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
488 preg->no_sub = !!(cflags & REG_NOSUB); |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
489 preg->translate = NULL; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
490 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
491 ret = re_compile_internal (preg, pattern, strlen (pattern), syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
492 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
493 /* POSIX doesn't distinguish between an unmatched open-group and an |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
494 unmatched close-group: both are REG_EPAREN. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
495 if (ret == REG_ERPAREN) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
496 ret = REG_EPAREN; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
497 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
498 /* We have already checked preg->fastmap != NULL. */ |
39918 | 499 if (__glibc_likely (ret == REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
500 /* Compute the fastmap now, since regexec cannot modify the pattern |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
501 buffer. This function never fails in this implementation. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
502 (void) re_compile_fastmap (preg); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
503 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
504 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
505 /* Some error occurred while compiling the expression. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
506 re_free (preg->fastmap); |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
507 preg->fastmap = NULL; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
508 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
509 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
510 return (int) ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
511 } |
19445 | 512 libc_hidden_def (__regcomp) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
513 weak_alias (__regcomp, regcomp) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
514 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
515 /* Returns a message corresponding to an error code, ERRCODE, returned |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
516 from either regcomp or regexec. We don't use PREG here. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
517 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
518 size_t |
40051 | 519 regerror (int errcode, const regex_t *__restrict preg, char *__restrict errbuf, |
18252
8162c20f4bc7
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18228
diff
changeset
|
520 size_t errbuf_size) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
521 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
522 const char *msg; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
523 size_t msg_size; |
39918 | 524 int nerrcodes = sizeof __re_error_msgid_idx / sizeof __re_error_msgid_idx[0]; |
525 | |
526 if (__glibc_unlikely (errcode < 0 || errcode >= nerrcodes)) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
527 /* Only error codes returned by the rest of the code should be passed |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
528 to this routine. If we are given anything else, or if other regex |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
529 code generates an invalid error code, then the program has a bug. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
530 Dump core so we can fix it. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
531 abort (); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
532 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
533 msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
534 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
535 msg_size = strlen (msg) + 1; /* Includes the null. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
536 |
39918 | 537 if (__glibc_likely (errbuf_size != 0)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
538 { |
8073
eaa00773406b
Avoid mempcpy in the regex code, as the string.h mempcpy stuff
Paul Eggert <eggert@cs.ucla.edu>
parents:
8045
diff
changeset
|
539 size_t cpy_size = msg_size; |
39918 | 540 if (__glibc_unlikely (msg_size > errbuf_size)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
541 { |
8073
eaa00773406b
Avoid mempcpy in the regex code, as the string.h mempcpy stuff
Paul Eggert <eggert@cs.ucla.edu>
parents:
8045
diff
changeset
|
542 cpy_size = errbuf_size - 1; |
eaa00773406b
Avoid mempcpy in the regex code, as the string.h mempcpy stuff
Paul Eggert <eggert@cs.ucla.edu>
parents:
8045
diff
changeset
|
543 errbuf[cpy_size] = '\0'; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
544 } |
8073
eaa00773406b
Avoid mempcpy in the regex code, as the string.h mempcpy stuff
Paul Eggert <eggert@cs.ucla.edu>
parents:
8045
diff
changeset
|
545 memcpy (errbuf, msg, cpy_size); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
546 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
547 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
548 return msg_size; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
549 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
550 weak_alias (__regerror, regerror) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
551 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
552 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
553 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
554 /* This static array is used for the map to single-byte characters when |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
555 UTF-8 is used. Otherwise we would allocate memory just to initialize |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
556 it the same all the time. UTF-8 is the preferred encoding so this is |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
557 a worthwhile optimization. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
558 static const bitset_t utf8_sb_map = |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
559 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
560 /* Set the first 128 bits. */ |
17412
6105f1dfb98e
c-ctype, regex, verify: port to gcc -std=c90 -pedantic
Paul Eggert <eggert@cs.ucla.edu>
parents:
17408
diff
changeset
|
561 # if defined __GNUC__ && !defined __STRICT_ANSI__ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
562 [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
563 # else |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
564 # if 4 * BITSET_WORD_BITS < ASCII_CHARS |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
565 # error "bitset_word_t is narrower than 32 bits" |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
566 # elif 3 * BITSET_WORD_BITS < ASCII_CHARS |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
567 BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX, |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
568 # elif 2 * BITSET_WORD_BITS < ASCII_CHARS |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
569 BITSET_WORD_MAX, BITSET_WORD_MAX, |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
570 # elif 1 * BITSET_WORD_BITS < ASCII_CHARS |
6214
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
571 BITSET_WORD_MAX, |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
572 # endif |
6214
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
573 (BITSET_WORD_MAX |
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
574 >> (SBC_MAX % BITSET_WORD_BITS == 0 |
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
575 ? 0 |
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
576 : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS)) |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
577 # endif |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
578 }; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
579 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
580 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
581 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
582 static void |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
583 free_dfa_content (re_dfa_t *dfa) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
584 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
585 Idx i, j; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
586 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
587 if (dfa->nodes) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
588 for (i = 0; i < dfa->nodes_len; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
589 free_token (dfa->nodes + i); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
590 re_free (dfa->nexts); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
591 for (i = 0; i < dfa->nodes_len; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
592 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
593 if (dfa->eclosures != NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
594 re_node_set_free (dfa->eclosures + i); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
595 if (dfa->inveclosures != NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
596 re_node_set_free (dfa->inveclosures + i); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
597 if (dfa->edests != NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
598 re_node_set_free (dfa->edests + i); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
599 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
600 re_free (dfa->edests); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
601 re_free (dfa->eclosures); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
602 re_free (dfa->inveclosures); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
603 re_free (dfa->nodes); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
604 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
605 if (dfa->state_table) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
606 for (i = 0; i <= dfa->state_hash_mask; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
607 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
608 struct re_state_table_entry *entry = dfa->state_table + i; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
609 for (j = 0; j < entry->num; ++j) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
610 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
611 re_dfastate_t *state = entry->array[j]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
612 free_state (state); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
613 } |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
614 re_free (entry->array); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
615 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
616 re_free (dfa->state_table); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
617 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
618 if (dfa->sb_char != utf8_sb_map) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
619 re_free (dfa->sb_char); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
620 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
621 re_free (dfa->subexp_map); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
622 #ifdef DEBUG |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
623 re_free (dfa->re_str); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
624 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
625 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
626 re_free (dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
627 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
628 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
629 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
630 /* Free dynamically allocated space used by PREG. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
631 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
632 void |
18252
8162c20f4bc7
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18228
diff
changeset
|
633 regfree (regex_t *preg) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
634 { |
16770
e011e0a7ab5a
regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents:
16730
diff
changeset
|
635 re_dfa_t *dfa = preg->buffer; |
39918 | 636 if (__glibc_likely (dfa != NULL)) |
17408
020c917cba9d
regex: fix dfa race in multithreaded uses
Paul Eggert <eggert@cs.ucla.edu>
parents:
17346
diff
changeset
|
637 { |
020c917cba9d
regex: fix dfa race in multithreaded uses
Paul Eggert <eggert@cs.ucla.edu>
parents:
17346
diff
changeset
|
638 lock_fini (dfa->lock); |
020c917cba9d
regex: fix dfa race in multithreaded uses
Paul Eggert <eggert@cs.ucla.edu>
parents:
17346
diff
changeset
|
639 free_dfa_content (dfa); |
020c917cba9d
regex: fix dfa race in multithreaded uses
Paul Eggert <eggert@cs.ucla.edu>
parents:
17346
diff
changeset
|
640 } |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
641 preg->buffer = NULL; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
642 preg->allocated = 0; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
643 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
644 re_free (preg->fastmap); |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
645 preg->fastmap = NULL; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
646 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
647 re_free (preg->translate); |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
648 preg->translate = NULL; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
649 } |
19445 | 650 libc_hidden_def (__regfree) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
651 weak_alias (__regfree, regfree) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
652 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
653 /* Entry points compatible with 4.2 BSD regex library. We don't define |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
654 them unless specifically requested. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
655 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
656 #if defined _REGEX_RE_COMP || defined _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
657 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
658 /* BSD has one and only one pattern buffer. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
659 static struct re_pattern_buffer re_comp_buf; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
660 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
661 char * |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
662 # ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
663 /* Make these definitions weak in libc, so POSIX programs can redefine |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
664 these names if they don't use our functions, and still use |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
665 regcomp/regexec above without link errors. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
666 weak_function |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
667 # endif |
18252
8162c20f4bc7
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18228
diff
changeset
|
668 re_comp (const char *s) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
669 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
670 reg_errcode_t ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
671 char *fastmap; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
672 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
673 if (!s) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
674 { |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
675 if (!re_comp_buf.buffer) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
676 return gettext ("No previous regular expression"); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
677 return 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
678 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
679 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
680 if (re_comp_buf.buffer) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
681 { |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
682 fastmap = re_comp_buf.fastmap; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
683 re_comp_buf.fastmap = NULL; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
684 __regfree (&re_comp_buf); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
685 memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
686 re_comp_buf.fastmap = fastmap; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
687 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
688 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
689 if (re_comp_buf.fastmap == NULL) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
690 { |
19476
1f3bb9a8c477
regex: use re_malloc etc. consistently
Paul Eggert <eggert@cs.ucla.edu>
parents:
19446
diff
changeset
|
691 re_comp_buf.fastmap = re_malloc (char, SBC_MAX); |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
692 if (re_comp_buf.fastmap == NULL) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
693 return (char *) gettext (__re_error_msgid |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
694 + __re_error_msgid_idx[(int) REG_ESPACE]); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
695 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
696 |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
697 /* Since 're_exec' always passes NULL for the 'regs' argument, we |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
698 don't need to initialize the pattern buffer fields which affect it. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
699 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
700 /* Match anchors at newlines. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
701 re_comp_buf.newline_anchor = 1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
702 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
703 ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
704 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
705 if (!ret) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
706 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
707 |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
708 /* Yes, we're discarding 'const' here if !HAVE_LIBINTL. */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
709 return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
710 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
711 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
712 #ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
713 libc_freeres_fn (free_mem) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
714 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
715 __regfree (&re_comp_buf); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
716 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
717 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
718 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
719 #endif /* _REGEX_RE_COMP */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
720 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
721 /* Internal entry point. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
722 Compile the regular expression PATTERN, whose length is LENGTH. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
723 SYNTAX indicate regular expression's syntax. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
724 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
725 static reg_errcode_t |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
726 re_compile_internal (regex_t *preg, const char * pattern, size_t length, |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
727 reg_syntax_t syntax) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
728 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
729 reg_errcode_t err = REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
730 re_dfa_t *dfa; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
731 re_string_t regexp; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
732 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
733 /* Initialize the pattern buffer. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
734 preg->fastmap_accurate = 0; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
735 preg->syntax = syntax; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
736 preg->not_bol = preg->not_eol = 0; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
737 preg->used = 0; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
738 preg->re_nsub = 0; |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
739 preg->can_be_null = 0; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
740 preg->regs_allocated = REGS_UNALLOCATED; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
741 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
742 /* Initialize the dfa. */ |
16770
e011e0a7ab5a
regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents:
16730
diff
changeset
|
743 dfa = preg->buffer; |
39918 | 744 if (__glibc_unlikely (preg->allocated < sizeof (re_dfa_t))) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
745 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
746 /* If zero allocated, but buffer is non-null, try to realloc |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
747 enough space. This loses if buffer's address is bogus, but |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
748 that is the user's responsibility. If ->buffer is NULL this |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
749 is a simple allocation. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
750 dfa = re_realloc (preg->buffer, re_dfa_t, 1); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
751 if (dfa == NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
752 return REG_ESPACE; |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
753 preg->allocated = sizeof (re_dfa_t); |
16770
e011e0a7ab5a
regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents:
16730
diff
changeset
|
754 preg->buffer = dfa; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
755 } |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
756 preg->used = sizeof (re_dfa_t); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
757 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
758 err = init_dfa (dfa, length); |
39918 | 759 if (__glibc_unlikely (err == REG_NOERROR && lock_init (dfa->lock) != 0)) |
17408
020c917cba9d
regex: fix dfa race in multithreaded uses
Paul Eggert <eggert@cs.ucla.edu>
parents:
17346
diff
changeset
|
760 err = REG_ESPACE; |
39918 | 761 if (__glibc_unlikely (err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
762 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
763 free_dfa_content (dfa); |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
764 preg->buffer = NULL; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
765 preg->allocated = 0; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
766 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
767 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
768 #ifdef DEBUG |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
769 /* Note: length+1 will not overflow since it is checked in init_dfa. */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
770 dfa->re_str = re_malloc (char, length + 1); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
771 strncpy (dfa->re_str, pattern, length + 1); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
772 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
773 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
774 err = re_string_construct (®exp, pattern, length, preg->translate, |
10078
f47c913858de
Fix violation of <stdbool.h> replacement in regex.
Eric Blake <ebb9@byu.net>
parents:
10075
diff
changeset
|
775 (syntax & RE_ICASE) != 0, dfa); |
39918 | 776 if (__glibc_unlikely (err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
777 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
778 re_compile_internal_free_return: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
779 free_workarea_compile (preg); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
780 re_string_destruct (®exp); |
17408
020c917cba9d
regex: fix dfa race in multithreaded uses
Paul Eggert <eggert@cs.ucla.edu>
parents:
17346
diff
changeset
|
781 lock_fini (dfa->lock); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
782 free_dfa_content (dfa); |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
783 preg->buffer = NULL; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
784 preg->allocated = 0; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
785 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
786 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
787 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
788 /* Parse the regular expression, and build a structure tree. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
789 preg->re_nsub = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
790 dfa->str_tree = parse (®exp, preg, syntax, &err); |
39918 | 791 if (__glibc_unlikely (dfa->str_tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
792 goto re_compile_internal_free_return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
793 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
794 /* Analyze the tree and create the nfa. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
795 err = analyze (preg); |
39918 | 796 if (__glibc_unlikely (err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
797 goto re_compile_internal_free_return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
798 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
799 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
800 /* If possible, do searching in single byte encoding to speed things up. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
801 if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
802 optimize_utf8 (dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
803 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
804 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
805 /* Then create the initial state of the dfa. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
806 err = create_initial_state (dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
807 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
808 /* Release work areas. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
809 free_workarea_compile (preg); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
810 re_string_destruct (®exp); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
811 |
39918 | 812 if (__glibc_unlikely (err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
813 { |
17408
020c917cba9d
regex: fix dfa race in multithreaded uses
Paul Eggert <eggert@cs.ucla.edu>
parents:
17346
diff
changeset
|
814 lock_fini (dfa->lock); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
815 free_dfa_content (dfa); |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
816 preg->buffer = NULL; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
817 preg->allocated = 0; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
818 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
819 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
820 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
821 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
822 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
823 /* Initialize DFA. We use the length of the regular expression PAT_LEN |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
824 as the initial length of some arrays. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
825 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
826 static reg_errcode_t |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
827 init_dfa (re_dfa_t *dfa, size_t pat_len) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
828 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
829 __re_size_t table_size; |
12567
ceb1562f60a5
regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents:
12559
diff
changeset
|
830 #ifndef _LIBC |
16730
d4cc21bf38ab
regex: pacify GCC when compiling GRUB
Paul Eggert <eggert@cs.ucla.edu>
parents:
16705
diff
changeset
|
831 const char *codeset_name; |
12567
ceb1562f60a5
regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents:
12559
diff
changeset
|
832 #endif |
6733
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6729
diff
changeset
|
833 #ifdef RE_ENABLE_I18N |
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6729
diff
changeset
|
834 size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t)); |
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6729
diff
changeset
|
835 #else |
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6729
diff
changeset
|
836 size_t max_i18n_object_size = 0; |
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6729
diff
changeset
|
837 #endif |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
838 size_t max_object_size = |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
839 MAX (sizeof (struct re_state_table_entry), |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
840 MAX (sizeof (re_token_t), |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
841 MAX (sizeof (re_node_set), |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
842 MAX (sizeof (regmatch_t), |
6733
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6729
diff
changeset
|
843 max_i18n_object_size)))); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
844 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
845 memset (dfa, '\0', sizeof (re_dfa_t)); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
846 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
847 /* Force allocation of str_tree_storage the first time. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
848 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
849 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
850 /* Avoid overflows. The extra "/ 2" is for the table_size doubling |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
851 calculation below, and for similar doubling calculations |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
852 elsewhere. And it's <= rather than <, because some of the |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
853 doubling calculations add 1 afterwards. */ |
39918 | 854 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) / 2 |
855 <= pat_len)) | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
856 return REG_ESPACE; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
857 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
858 dfa->nodes_alloc = pat_len + 1; |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
859 dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
860 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
861 /* table_size = 2 ^ ceil(log pat_len) */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
862 for (table_size = 1; ; table_size <<= 1) |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
863 if (table_size > pat_len) |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
864 break; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
865 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
866 dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
867 dfa->state_hash_mask = table_size - 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
868 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
869 dfa->mb_cur_max = MB_CUR_MAX; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
870 #ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
871 if (dfa->mb_cur_max == 6 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
872 && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
873 dfa->is_utf8 = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
874 dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
875 != 0); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
876 #else |
12567
ceb1562f60a5
regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents:
12559
diff
changeset
|
877 codeset_name = nl_langinfo (CODESET); |
16950
87796549f866
regex: use locale-independent comparison for codeset name
Paul Eggert <eggert@cs.ucla.edu>
parents:
16912
diff
changeset
|
878 if ((codeset_name[0] == 'U' || codeset_name[0] == 'u') |
87796549f866
regex: use locale-independent comparison for codeset name
Paul Eggert <eggert@cs.ucla.edu>
parents:
16912
diff
changeset
|
879 && (codeset_name[1] == 'T' || codeset_name[1] == 't') |
87796549f866
regex: use locale-independent comparison for codeset name
Paul Eggert <eggert@cs.ucla.edu>
parents:
16912
diff
changeset
|
880 && (codeset_name[2] == 'F' || codeset_name[2] == 'f') |
87796549f866
regex: use locale-independent comparison for codeset name
Paul Eggert <eggert@cs.ucla.edu>
parents:
16912
diff
changeset
|
881 && strcmp (codeset_name + 3 + (codeset_name[3] == '-'), "8") == 0) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
882 dfa->is_utf8 = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
883 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
884 /* We check exhaustively in the loop below if this charset is a |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
885 superset of ASCII. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
886 dfa->map_notascii = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
887 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
888 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
889 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
890 if (dfa->mb_cur_max > 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
891 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
892 if (dfa->is_utf8) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
893 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
894 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
895 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
896 int i, j, ch; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
897 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
898 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); |
39918 | 899 if (__glibc_unlikely (dfa->sb_char == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
900 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
901 |
6214
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
902 /* Set the bits corresponding to single byte chars. */ |
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
903 for (i = 0, ch = 0; i < BITSET_WORDS; ++i) |
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
904 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
905 { |
5972
aa260da0bbbe
* config/srclist.txt: Comment out regcomp.c, since we have a porting fix
Paul Eggert <eggert@cs.ucla.edu>
parents:
5968
diff
changeset
|
906 wint_t wch = __btowc (ch); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
907 if (wch != WEOF) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
908 dfa->sb_char[i] |= (bitset_word_t) 1 << j; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
909 # ifndef _LIBC |
5972
aa260da0bbbe
* config/srclist.txt: Comment out regcomp.c, since we have a porting fix
Paul Eggert <eggert@cs.ucla.edu>
parents:
5968
diff
changeset
|
910 if (isascii (ch) && wch != ch) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
911 dfa->map_notascii = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
912 # endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
913 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
914 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
915 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
916 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
917 |
39918 | 918 if (__glibc_unlikely (dfa->nodes == NULL || dfa->state_table == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
919 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
920 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
921 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
922 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
923 /* Initialize WORD_CHAR table, which indicate which character is |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
924 "word". In this case "word" means that it is the word construction |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
925 character used by some operators like "\<", "\>", etc. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
926 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
927 static void |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
928 init_word_char (re_dfa_t *dfa) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
929 { |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
930 int i = 0; |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
931 int j; |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
932 int ch = 0; |
17234 | 933 dfa->word_ops_used = 1; |
39918 | 934 if (__glibc_likely (dfa->map_notascii == 0)) |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
935 { |
19446 | 936 /* Avoid uint32_t and uint64_t as some non-GCC platforms lack |
937 them, an issue when this code is used in Gnulib. */ | |
16882
551fb0402288
regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents:
16770
diff
changeset
|
938 bitset_word_t bits0 = 0x00000000; |
551fb0402288
regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents:
16770
diff
changeset
|
939 bitset_word_t bits1 = 0x03ff0000; |
551fb0402288
regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents:
16770
diff
changeset
|
940 bitset_word_t bits2 = 0x87fffffe; |
551fb0402288
regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents:
16770
diff
changeset
|
941 bitset_word_t bits3 = 0x07fffffe; |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
942 if (BITSET_WORD_BITS == 64) |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
943 { |
19446 | 944 /* Pacify gcc -Woverflow on 32-bit platformns. */ |
16882
551fb0402288
regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents:
16770
diff
changeset
|
945 dfa->word_char[0] = bits1 << 31 << 1 | bits0; |
551fb0402288
regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents:
16770
diff
changeset
|
946 dfa->word_char[1] = bits3 << 31 << 1 | bits2; |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
947 i = 2; |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
948 } |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
949 else if (BITSET_WORD_BITS == 32) |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
950 { |
16882
551fb0402288
regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents:
16770
diff
changeset
|
951 dfa->word_char[0] = bits0; |
551fb0402288
regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents:
16770
diff
changeset
|
952 dfa->word_char[1] = bits1; |
551fb0402288
regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents:
16770
diff
changeset
|
953 dfa->word_char[2] = bits2; |
551fb0402288
regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents:
16770
diff
changeset
|
954 dfa->word_char[3] = bits3; |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
955 i = 4; |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
956 } |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
957 else |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
958 goto general_case; |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
959 ch = 128; |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
960 |
39918 | 961 if (__glibc_likely (dfa->is_utf8)) |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
962 { |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
963 memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8); |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
964 return; |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
965 } |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
966 } |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
967 |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
968 general_case: |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
969 for (; i < BITSET_WORDS; ++i) |
6214
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
970 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
971 if (isalnum (ch) || ch == '_') |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
972 dfa->word_char[i] |= (bitset_word_t) 1 << j; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
973 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
974 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
975 /* Free the work area which are only used while compiling. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
976 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
977 static void |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
978 free_workarea_compile (regex_t *preg) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
979 { |
16770
e011e0a7ab5a
regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents:
16730
diff
changeset
|
980 re_dfa_t *dfa = preg->buffer; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
981 bin_tree_storage_t *storage, *next; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
982 for (storage = dfa->str_tree_storage; storage; storage = next) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
983 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
984 next = storage->next; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
985 re_free (storage); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
986 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
987 dfa->str_tree_storage = NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
988 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
989 dfa->str_tree = NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
990 re_free (dfa->org_indices); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
991 dfa->org_indices = NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
992 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
993 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
994 /* Create initial states for all contexts. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
995 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
996 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
997 create_initial_state (re_dfa_t *dfa) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
998 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
999 Idx first, i; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1000 reg_errcode_t err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1001 re_node_set init_nodes; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1002 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1003 /* Initial states have the epsilon closure of the node which is |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1004 the first node of the regular expression. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1005 first = dfa->str_tree->first->node_idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1006 dfa->init_node = first; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1007 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); |
39918 | 1008 if (__glibc_unlikely (err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1009 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1010 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1011 /* The back-references which are in initial states can epsilon transit, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1012 since in this case all of the subexpressions can be null. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1013 Then we add epsilon closures of the nodes which are the next nodes of |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1014 the back-references. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1015 if (dfa->nbackref > 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1016 for (i = 0; i < init_nodes.nelem; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1017 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1018 Idx node_idx = init_nodes.elems[i]; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1019 re_token_type_t type = dfa->nodes[node_idx].type; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1020 |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1021 Idx clexp_idx; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1022 if (type != OP_BACK_REF) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1023 continue; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1024 for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1025 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1026 re_token_t *clexp_node; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1027 clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1028 if (clexp_node->type == OP_CLOSE_SUBEXP |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1029 && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1030 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1031 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1032 if (clexp_idx == init_nodes.nelem) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1033 continue; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1034 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1035 if (type == OP_BACK_REF) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1036 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1037 Idx dest_idx = dfa->edests[node_idx].elems[0]; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1038 if (!re_node_set_contains (&init_nodes, dest_idx)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1039 { |
12847
64dad3a0ba71
regcomp.c: avoid a new -Wshadow warning
Jim Meyering <meyering@redhat.com>
parents:
12831
diff
changeset
|
1040 reg_errcode_t merge_err |
64dad3a0ba71
regcomp.c: avoid a new -Wshadow warning
Jim Meyering <meyering@redhat.com>
parents:
12831
diff
changeset
|
1041 = re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); |
64dad3a0ba71
regcomp.c: avoid a new -Wshadow warning
Jim Meyering <meyering@redhat.com>
parents:
12831
diff
changeset
|
1042 if (merge_err != REG_NOERROR) |
64dad3a0ba71
regcomp.c: avoid a new -Wshadow warning
Jim Meyering <meyering@redhat.com>
parents:
12831
diff
changeset
|
1043 return merge_err; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1044 i = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1045 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1046 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1047 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1048 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1049 /* It must be the first time to invoke acquire_state. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1050 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1051 /* We don't check ERR here, since the initial state must not be NULL. */ |
39918 | 1052 if (__glibc_unlikely (dfa->init_state == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1053 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1054 if (dfa->init_state->has_constraint) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1055 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1056 dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1057 CONTEXT_WORD); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1058 dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1059 CONTEXT_NEWLINE); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1060 dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1061 &init_nodes, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1062 CONTEXT_NEWLINE |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1063 | CONTEXT_BEGBUF); |
39918 | 1064 if (__glibc_unlikely (dfa->init_state_word == NULL |
1065 || dfa->init_state_nl == NULL | |
1066 || dfa->init_state_begbuf == NULL)) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1067 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1068 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1069 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1070 dfa->init_state_word = dfa->init_state_nl |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1071 = dfa->init_state_begbuf = dfa->init_state; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1072 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1073 re_node_set_free (&init_nodes); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1074 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1075 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1076 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1077 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1078 /* If it is possible to do searching in single byte encoding instead of UTF-8 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1079 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1080 DFA nodes where needed. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1081 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1082 static void |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1083 optimize_utf8 (re_dfa_t *dfa) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1084 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1085 Idx node; |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1086 int i; |
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1087 bool mb_chars = false; |
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1088 bool has_period = false; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1089 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1090 for (node = 0; node < dfa->nodes_len; ++node) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1091 switch (dfa->nodes[node].type) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1092 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1093 case CHARACTER: |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1094 if (dfa->nodes[node].opr.c >= ASCII_CHARS) |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1095 mb_chars = true; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1096 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1097 case ANCHOR: |
9494
7cd817e07a16
Fix a 4-year-old used-uninitialized bug in regcomp.c.
Jim Meyering <meyering@redhat.com>
parents:
8153
diff
changeset
|
1098 switch (dfa->nodes[node].opr.ctx_type) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1099 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1100 case LINE_FIRST: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1101 case LINE_LAST: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1102 case BUF_FIRST: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1103 case BUF_LAST: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1104 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1105 default: |
10075
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1106 /* Word anchors etc. cannot be handled. It's okay to test |
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1107 opr.ctx_type since constraints (for all DFA nodes) are |
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1108 created by ORing one or more opr.ctx_type values. */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1109 return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1110 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1111 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1112 case OP_PERIOD: |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
1113 has_period = true; |
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
1114 break; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1115 case OP_BACK_REF: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1116 case OP_ALT: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1117 case END_OF_RE: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1118 case OP_DUP_ASTERISK: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1119 case OP_OPEN_SUBEXP: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1120 case OP_CLOSE_SUBEXP: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1121 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1122 case COMPLEX_BRACKET: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1123 return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1124 case SIMPLE_BRACKET: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1125 /* Just double check. */ |
6214
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
1126 { |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1127 int rshift = (ASCII_CHARS % BITSET_WORD_BITS == 0 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1128 ? 0 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1129 : BITSET_WORD_BITS - ASCII_CHARS % BITSET_WORD_BITS); |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1130 for (i = ASCII_CHARS / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) |
6214
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
1131 { |
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
1132 if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0) |
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
1133 return; |
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
1134 rshift = 0; |
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
1135 } |
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
1136 } |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1137 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1138 default: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1139 abort (); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1140 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1141 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1142 if (mb_chars || has_period) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1143 for (node = 0; node < dfa->nodes_len; ++node) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1144 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1145 if (dfa->nodes[node].type == CHARACTER |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1146 && dfa->nodes[node].opr.c >= ASCII_CHARS) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1147 dfa->nodes[node].mb_partial = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1148 else if (dfa->nodes[node].type == OP_PERIOD) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1149 dfa->nodes[node].type = OP_UTF8_PERIOD; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1150 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1151 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1152 /* The search can be in single byte locale. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1153 dfa->mb_cur_max = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1154 dfa->is_utf8 = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1155 dfa->has_mb_node = dfa->nbackref > 0 || has_period; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1156 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1157 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1158 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1159 /* Analyze the structure tree, and calculate "first", "next", "edest", |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1160 "eclosure", and "inveclosure". */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1161 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1162 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1163 analyze (regex_t *preg) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1164 { |
16770
e011e0a7ab5a
regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents:
16730
diff
changeset
|
1165 re_dfa_t *dfa = preg->buffer; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1166 reg_errcode_t ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1167 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1168 /* Allocate arrays. */ |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1169 dfa->nexts = re_malloc (Idx, dfa->nodes_alloc); |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1170 dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc); |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1171 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1172 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); |
39918 | 1173 if (__glibc_unlikely (dfa->nexts == NULL || dfa->org_indices == NULL |
1174 || dfa->edests == NULL || dfa->eclosures == NULL)) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1175 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1176 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1177 dfa->subexp_map = re_malloc (Idx, preg->re_nsub); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1178 if (dfa->subexp_map != NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1179 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1180 Idx i; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1181 for (i = 0; i < preg->re_nsub; i++) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1182 dfa->subexp_map[i] = i; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1183 preorder (dfa->str_tree, optimize_subexps, dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1184 for (i = 0; i < preg->re_nsub; i++) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1185 if (dfa->subexp_map[i] != i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1186 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1187 if (i == preg->re_nsub) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1188 { |
19476
1f3bb9a8c477
regex: use re_malloc etc. consistently
Paul Eggert <eggert@cs.ucla.edu>
parents:
19446
diff
changeset
|
1189 re_free (dfa->subexp_map); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1190 dfa->subexp_map = NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1191 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1192 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1193 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1194 ret = postorder (dfa->str_tree, lower_subexps, preg); |
39918 | 1195 if (__glibc_unlikely (ret != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1196 return ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1197 ret = postorder (dfa->str_tree, calc_first, dfa); |
39918 | 1198 if (__glibc_unlikely (ret != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1199 return ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1200 preorder (dfa->str_tree, calc_next, dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1201 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); |
39918 | 1202 if (__glibc_unlikely (ret != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1203 return ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1204 ret = calc_eclosure (dfa); |
39918 | 1205 if (__glibc_unlikely (ret != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1206 return ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1207 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1208 /* We only need this during the prune_impossible_nodes pass in regexec.c; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1209 skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1210 if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1211 || dfa->nbackref) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1212 { |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1213 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); |
39918 | 1214 if (__glibc_unlikely (dfa->inveclosures == NULL)) |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
1215 return REG_ESPACE; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1216 ret = calc_inveclosure (dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1217 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1218 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1219 return ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1220 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1221 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1222 /* Our parse trees are very unbalanced, so we cannot use a stack to |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1223 implement parse tree visits. Instead, we use parent pointers and |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1224 some hairy code in these two functions. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1225 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1226 postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1227 void *extra) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1228 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1229 bin_tree_t *node, *prev; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1230 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1231 for (node = root; ; ) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1232 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1233 /* Descend down the tree, preferably to the left (or to the right |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1234 if that's the only child). */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1235 while (node->left || node->right) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1236 if (node->left) |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
1237 node = node->left; |
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
1238 else |
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
1239 node = node->right; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1240 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1241 do |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1242 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1243 reg_errcode_t err = fn (extra, node); |
39918 | 1244 if (__glibc_unlikely (err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1245 return err; |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
1246 if (node->parent == NULL) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1247 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1248 prev = node; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1249 node = node->parent; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1250 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1251 /* Go up while we have a node that is reached from the right. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1252 while (node->right == prev || node->right == NULL); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1253 node = node->right; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1254 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1255 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1256 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1257 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1258 preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1259 void *extra) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1260 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1261 bin_tree_t *node; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1262 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1263 for (node = root; ; ) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1264 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1265 reg_errcode_t err = fn (extra, node); |
39918 | 1266 if (__glibc_unlikely (err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1267 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1268 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1269 /* Go to the left node, or up and to the right. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1270 if (node->left) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1271 node = node->left; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1272 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1273 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1274 bin_tree_t *prev = NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1275 while (node->right == prev || node->right == NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1276 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1277 prev = node; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1278 node = node->parent; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1279 if (!node) |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
1280 return REG_NOERROR; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1281 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1282 node = node->right; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1283 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1284 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1285 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1286 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1287 /* Optimization pass: if a SUBEXP is entirely contained, strip it and tell |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1288 re_search_internal to map the inner one's opr.idx to this one's. Adjust |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1289 backreferences as well. Requires a preorder visit. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1290 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1291 optimize_subexps (void *extra, bin_tree_t *node) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1292 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1293 re_dfa_t *dfa = (re_dfa_t *) extra; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1294 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1295 if (node->token.type == OP_BACK_REF && dfa->subexp_map) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1296 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1297 int idx = node->token.opr.idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1298 node->token.opr.idx = dfa->subexp_map[idx]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1299 dfa->used_bkref_map |= 1 << node->token.opr.idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1300 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1301 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1302 else if (node->token.type == SUBEXP |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
1303 && node->left && node->left->token.type == SUBEXP) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1304 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1305 Idx other_idx = node->left->token.opr.idx; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1306 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1307 node->left = node->left->left; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1308 if (node->left) |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
1309 node->left->parent = node; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1310 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1311 dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; |
6214
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
1312 if (other_idx < BITSET_WORD_BITS) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1313 dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1314 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1315 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1316 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1317 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1318 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1319 /* Lowering pass: Turn each SUBEXP node into the appropriate concatenation |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1320 of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1321 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1322 lower_subexps (void *extra, bin_tree_t *node) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1323 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1324 regex_t *preg = (regex_t *) extra; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1325 reg_errcode_t err = REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1326 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1327 if (node->left && node->left->token.type == SUBEXP) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1328 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1329 node->left = lower_subexp (&err, preg, node->left); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1330 if (node->left) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1331 node->left->parent = node; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1332 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1333 if (node->right && node->right->token.type == SUBEXP) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1334 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1335 node->right = lower_subexp (&err, preg, node->right); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1336 if (node->right) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1337 node->right->parent = node; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1338 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1339 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1340 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1341 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1342 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1343 static bin_tree_t * |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1344 lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1345 { |
16770
e011e0a7ab5a
regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents:
16730
diff
changeset
|
1346 re_dfa_t *dfa = preg->buffer; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1347 bin_tree_t *body = node->left; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1348 bin_tree_t *op, *cls, *tree1, *tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1349 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1350 if (preg->no_sub |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1351 /* We do not optimize empty subexpressions, because otherwise we may |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1352 have bad CONCAT nodes with NULL children. This is obviously not |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1353 very common, so we do not lose much. An example that triggers |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1354 this case is the sed "script" /\(\)/x. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1355 && node->left != NULL |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1356 && (node->token.opr.idx >= BITSET_WORD_BITS |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1357 || !(dfa->used_bkref_map |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1358 & ((bitset_word_t) 1 << node->token.opr.idx)))) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1359 return node->left; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1360 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1361 /* Convert the SUBEXP node to the concatenation of an |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1362 OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1363 op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1364 cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1365 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1366 tree = create_tree (dfa, op, tree1, CONCAT); |
39918 | 1367 if (__glibc_unlikely (tree == NULL || tree1 == NULL |
1368 || op == NULL || cls == NULL)) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1369 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1370 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1371 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1372 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1373 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1374 op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1375 op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1376 return tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1377 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1378 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1379 /* Pass 1 in building the NFA: compute FIRST and create unlinked automaton |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1380 nodes. Requires a postorder visit. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1381 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1382 calc_first (void *extra, bin_tree_t *node) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1383 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1384 re_dfa_t *dfa = (re_dfa_t *) extra; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1385 if (node->token.type == CONCAT) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1386 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1387 node->first = node->left->first; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1388 node->node_idx = node->left->node_idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1389 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1390 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1391 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1392 node->first = node; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1393 node->node_idx = re_dfa_add_node (dfa, node->token); |
39918 | 1394 if (__glibc_unlikely (node->node_idx == -1)) |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
1395 return REG_ESPACE; |
10075
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1396 if (node->token.type == ANCHOR) |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
1397 dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1398 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1399 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1400 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1401 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1402 /* Pass 2: compute NEXT on the tree. Preorder visit. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1403 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1404 calc_next (void *extra, bin_tree_t *node) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1405 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1406 switch (node->token.type) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1407 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1408 case OP_DUP_ASTERISK: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1409 node->left->next = node; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1410 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1411 case CONCAT: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1412 node->left->next = node->right->first; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1413 node->right->next = node->next; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1414 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1415 default: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1416 if (node->left) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1417 node->left->next = node->next; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1418 if (node->right) |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
1419 node->right->next = node->next; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1420 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1421 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1422 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1423 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1424 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1425 /* Pass 3: link all DFA nodes to their NEXT node (any order will do). */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1426 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1427 link_nfa_nodes (void *extra, bin_tree_t *node) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1428 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1429 re_dfa_t *dfa = (re_dfa_t *) extra; |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1430 Idx idx = node->node_idx; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1431 reg_errcode_t err = REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1432 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1433 switch (node->token.type) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1434 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1435 case CONCAT: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1436 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1437 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1438 case END_OF_RE: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1439 assert (node->next == NULL); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1440 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1441 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1442 case OP_DUP_ASTERISK: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1443 case OP_ALT: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1444 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1445 Idx left, right; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1446 dfa->has_plural_match = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1447 if (node->left != NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1448 left = node->left->first->node_idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1449 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1450 left = node->next->node_idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1451 if (node->right != NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1452 right = node->right->first->node_idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1453 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1454 right = node->next->node_idx; |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
1455 assert (left > -1); |
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
1456 assert (right > -1); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1457 err = re_node_set_init_2 (dfa->edests + idx, left, right); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1458 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1459 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1460 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1461 case ANCHOR: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1462 case OP_OPEN_SUBEXP: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1463 case OP_CLOSE_SUBEXP: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1464 err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1465 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1466 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1467 case OP_BACK_REF: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1468 dfa->nexts[idx] = node->next->node_idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1469 if (node->token.type == OP_BACK_REF) |
12829
971957a253f8
regcomp.c: do not ignore internal return values
Jim Meyering <meyering@redhat.com>
parents:
12572
diff
changeset
|
1470 err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1471 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1472 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1473 default: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1474 assert (!IS_EPSILON_NODE (node->token.type)); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1475 dfa->nexts[idx] = node->next->node_idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1476 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1477 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1478 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1479 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1480 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1481 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1482 /* Duplicate the epsilon closure of the node ROOT_NODE. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1483 Note that duplicated nodes have constraint INIT_CONSTRAINT in addition |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1484 to their own constraint. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1485 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1486 static reg_errcode_t |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1487 duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node, |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1488 Idx root_node, unsigned int init_constraint) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1489 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1490 Idx org_node, clone_node; |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1491 bool ok; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1492 unsigned int constraint = init_constraint; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1493 for (org_node = top_org_node, clone_node = top_clone_node;;) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1494 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1495 Idx org_dest, clone_dest; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1496 if (dfa->nodes[org_node].type == OP_BACK_REF) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1497 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1498 /* If the back reference epsilon-transit, its destination must |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1499 also have the constraint. Then duplicate the epsilon closure |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1500 of the destination of the back reference, and store it in |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1501 edests of the back reference. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1502 org_dest = dfa->nexts[org_node]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1503 re_node_set_empty (dfa->edests + clone_node); |
6071
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1504 clone_dest = duplicate_node (dfa, org_dest, constraint); |
39918 | 1505 if (__glibc_unlikely (clone_dest == -1)) |
6071
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1506 return REG_ESPACE; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1507 dfa->nexts[clone_node] = dfa->nexts[org_node]; |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1508 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); |
39918 | 1509 if (__glibc_unlikely (! ok)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1510 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1511 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1512 else if (dfa->edests[org_node].nelem == 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1513 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1514 /* In case of the node can't epsilon-transit, don't duplicate the |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1515 destination and store the original destination as the |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1516 destination of the node. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1517 dfa->nexts[clone_node] = dfa->nexts[org_node]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1518 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1519 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1520 else if (dfa->edests[org_node].nelem == 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1521 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1522 /* In case of the node can epsilon-transit, and it has only one |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1523 destination. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1524 org_dest = dfa->edests[org_node].elems[0]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1525 re_node_set_empty (dfa->edests + clone_node); |
10075
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1526 /* If the node is root_node itself, it means the epsilon closure |
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1527 has a loop. Then tie it to the destination of the root_node. */ |
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1528 if (org_node == root_node && clone_node != org_node) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1529 { |
10075
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1530 ok = re_node_set_insert (dfa->edests + clone_node, org_dest); |
39918 | 1531 if (__glibc_unlikely (! ok)) |
10075
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1532 return REG_ESPACE; |
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1533 break; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1534 } |
10075
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1535 /* In case the node has another constraint, append it. */ |
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1536 constraint |= dfa->nodes[org_node].constraint; |
6071
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1537 clone_dest = duplicate_node (dfa, org_dest, constraint); |
39918 | 1538 if (__glibc_unlikely (clone_dest == -1)) |
6071
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1539 return REG_ESPACE; |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1540 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); |
39918 | 1541 if (__glibc_unlikely (! ok)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1542 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1543 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1544 else /* dfa->edests[org_node].nelem == 2 */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1545 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1546 /* In case of the node can epsilon-transit, and it has two |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1547 destinations. In the bin_tree_t and DFA, that's '|' and '*'. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1548 org_dest = dfa->edests[org_node].elems[0]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1549 re_node_set_empty (dfa->edests + clone_node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1550 /* Search for a duplicated node which satisfies the constraint. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1551 clone_dest = search_duplicated_node (dfa, org_dest, constraint); |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
1552 if (clone_dest == -1) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1553 { |
10075
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1554 /* There is no such duplicated node, create a new one. */ |
6071
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1555 reg_errcode_t err; |
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1556 clone_dest = duplicate_node (dfa, org_dest, constraint); |
39918 | 1557 if (__glibc_unlikely (clone_dest == -1)) |
6071
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1558 return REG_ESPACE; |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1559 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); |
39918 | 1560 if (__glibc_unlikely (! ok)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1561 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1562 err = duplicate_node_closure (dfa, org_dest, clone_dest, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1563 root_node, constraint); |
39918 | 1564 if (__glibc_unlikely (err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1565 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1566 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1567 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1568 { |
12569
51ea446bf1f8
regcomp: fix typo in comment
Jim Meyering <meyering@redhat.com>
parents:
12568
diff
changeset
|
1569 /* There is a duplicated node which satisfies the constraint, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1570 use it to avoid infinite loop. */ |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1571 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); |
39918 | 1572 if (__glibc_unlikely (! ok)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1573 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1574 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1575 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1576 org_dest = dfa->edests[org_node].elems[1]; |
6071
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1577 clone_dest = duplicate_node (dfa, org_dest, constraint); |
39918 | 1578 if (__glibc_unlikely (clone_dest == -1)) |
6071
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1579 return REG_ESPACE; |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1580 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); |
39918 | 1581 if (__glibc_unlikely (! ok)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1582 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1583 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1584 org_node = org_dest; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1585 clone_node = clone_dest; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1586 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1587 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1588 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1589 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1590 /* Search for a node which is duplicated from the node ORG_NODE, and |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1591 satisfies the constraint CONSTRAINT. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1592 |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1593 static Idx |
6185
6b09f7f6ba73
* lib/regcomp.c (search_duplicated_node): Make first pointer arg
Paul Eggert <eggert@cs.ucla.edu>
parents:
6184
diff
changeset
|
1594 search_duplicated_node (const re_dfa_t *dfa, Idx org_node, |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1595 unsigned int constraint) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1596 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1597 Idx idx; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1598 for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1599 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1600 if (org_node == dfa->org_indices[idx] |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1601 && constraint == dfa->nodes[idx].constraint) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1602 return idx; /* Found. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1603 } |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
1604 return -1; /* Not found. */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1605 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1606 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1607 /* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
1608 Return the index of the new node, or -1 if insufficient storage is |
6071
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1609 available. */ |
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1610 |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1611 static Idx |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1612 duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1613 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1614 Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); |
39918 | 1615 if (__glibc_likely (dup_idx != -1)) |
6071
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1616 { |
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1617 dfa->nodes[dup_idx].constraint = constraint; |
10075
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1618 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint; |
6071
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1619 dfa->nodes[dup_idx].duplicated = 1; |
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1620 |
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1621 /* Store the index of the original node. */ |
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1622 dfa->org_indices[dup_idx] = org_idx; |
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1623 } |
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
1624 return dup_idx; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1625 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1626 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1627 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1628 calc_inveclosure (re_dfa_t *dfa) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1629 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1630 Idx src, idx; |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1631 bool ok; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1632 for (idx = 0; idx < dfa->nodes_len; ++idx) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1633 re_node_set_init_empty (dfa->inveclosures + idx); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1634 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1635 for (src = 0; src < dfa->nodes_len; ++src) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1636 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1637 Idx *elems = dfa->eclosures[src].elems; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1638 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1639 { |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1640 ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); |
39918 | 1641 if (__glibc_unlikely (! ok)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1642 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1643 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1644 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1645 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1646 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1647 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1648 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1649 /* Calculate "eclosure" for all the node in DFA. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1650 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1651 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1652 calc_eclosure (re_dfa_t *dfa) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1653 { |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1654 Idx node_idx; |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1655 bool incomplete; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1656 #ifdef DEBUG |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1657 assert (dfa->nodes_len > 0); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1658 #endif |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1659 incomplete = false; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1660 /* For each nodes, calculate epsilon closure. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1661 for (node_idx = 0; ; ++node_idx) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1662 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1663 reg_errcode_t err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1664 re_node_set eclosure_elem; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1665 if (node_idx == dfa->nodes_len) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1666 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1667 if (!incomplete) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1668 break; |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1669 incomplete = false; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1670 node_idx = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1671 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1672 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1673 #ifdef DEBUG |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
1674 assert (dfa->eclosures[node_idx].nelem != -1); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1675 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1676 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1677 /* If we have already calculated, skip it. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1678 if (dfa->eclosures[node_idx].nelem != 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1679 continue; |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
1680 /* Calculate epsilon closure of 'node_idx'. */ |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1681 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true); |
39918 | 1682 if (__glibc_unlikely (err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1683 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1684 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1685 if (dfa->eclosures[node_idx].nelem == 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1686 { |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1687 incomplete = true; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1688 re_node_set_free (&eclosure_elem); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1689 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1690 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1691 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1692 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1693 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1694 /* Calculate epsilon closure of NODE. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1695 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1696 static reg_errcode_t |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1697 calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1698 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1699 reg_errcode_t err; |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1700 Idx i; |
12831
00cfc5186819
regcomp.c: spelling and merge-artifact from glibc
Jim Meyering <meyering@redhat.com>
parents:
12830
diff
changeset
|
1701 re_node_set eclosure; |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1702 bool ok; |
12831
00cfc5186819
regcomp.c: spelling and merge-artifact from glibc
Jim Meyering <meyering@redhat.com>
parents:
12830
diff
changeset
|
1703 bool incomplete = false; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1704 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); |
39918 | 1705 if (__glibc_unlikely (err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1706 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1707 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1708 /* This indicates that we are calculating this node now. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1709 We reference this value to avoid infinite loop. */ |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
1710 dfa->eclosures[node].nelem = -1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1711 |
10075
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1712 /* If the current node has constraints, duplicate all nodes |
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1713 since they must inherit the constraints. */ |
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1714 if (dfa->nodes[node].constraint |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1715 && dfa->edests[node].nelem |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1716 && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1717 { |
10075
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1718 err = duplicate_node_closure (dfa, node, node, node, |
cc7bfc9f7fc1
optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents:
9494
diff
changeset
|
1719 dfa->nodes[node].constraint); |
39918 | 1720 if (__glibc_unlikely (err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1721 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1722 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1723 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1724 /* Expand each epsilon destination nodes. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1725 if (IS_EPSILON_NODE(dfa->nodes[node].type)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1726 for (i = 0; i < dfa->edests[node].nelem; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1727 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1728 re_node_set eclosure_elem; |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
1729 Idx edest = dfa->edests[node].elems[i]; |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
1730 /* If calculating the epsilon closure of 'edest' is in progress, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1731 return intermediate result. */ |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
1732 if (dfa->eclosures[edest].nelem == -1) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1733 { |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1734 incomplete = true; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1735 continue; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1736 } |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
1737 /* If we haven't calculated the epsilon closure of 'edest' yet, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1738 calculate now. Otherwise use calculated epsilon closure. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1739 if (dfa->eclosures[edest].nelem == 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1740 { |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1741 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false); |
39918 | 1742 if (__glibc_unlikely (err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1743 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1744 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1745 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1746 eclosure_elem = dfa->eclosures[edest]; |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
1747 /* Merge the epsilon closure of 'edest'. */ |
12829
971957a253f8
regcomp.c: do not ignore internal return values
Jim Meyering <meyering@redhat.com>
parents:
12572
diff
changeset
|
1748 err = re_node_set_merge (&eclosure, &eclosure_elem); |
39918 | 1749 if (__glibc_unlikely (err != REG_NOERROR)) |
12829
971957a253f8
regcomp.c: do not ignore internal return values
Jim Meyering <meyering@redhat.com>
parents:
12572
diff
changeset
|
1750 return err; |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
1751 /* If the epsilon closure of 'edest' is incomplete, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1752 the epsilon closure of this node is also incomplete. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1753 if (dfa->eclosures[edest].nelem == 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1754 { |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1755 incomplete = true; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1756 re_node_set_free (&eclosure_elem); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1757 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1758 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1759 |
12831
00cfc5186819
regcomp.c: spelling and merge-artifact from glibc
Jim Meyering <meyering@redhat.com>
parents:
12830
diff
changeset
|
1760 /* An epsilon closure includes itself. */ |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1761 ok = re_node_set_insert (&eclosure, node); |
39918 | 1762 if (__glibc_unlikely (! ok)) |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
1763 return REG_ESPACE; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1764 if (incomplete && !root) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1765 dfa->eclosures[node].nelem = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1766 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1767 dfa->eclosures[node] = eclosure; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1768 *new_set = eclosure; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1769 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1770 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1771 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1772 /* Functions for token which are used in the parser. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1773 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1774 /* Fetch a token from INPUT. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1775 We must not use this function inside bracket expressions. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1776 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1777 static void |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1778 fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1779 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1780 re_string_skip_bytes (input, peek_token (result, input, syntax)); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1781 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1782 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1783 /* Peek a token from INPUT, and return the length of the token. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1784 We must not use this function inside bracket expressions. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1785 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1786 static int |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1787 peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1788 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1789 unsigned char c; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1790 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1791 if (re_string_eoi (input)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1792 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1793 token->type = END_OF_RE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1794 return 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1795 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1796 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1797 c = re_string_peek_byte (input, 0); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1798 token->opr.c = c; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1799 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1800 token->word_char = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1801 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1802 token->mb_partial = 0; |
40245 | 1803 if (input->mb_cur_max > 1 |
1804 && !re_string_first_byte (input, re_string_cur_idx (input))) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1805 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1806 token->type = CHARACTER; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1807 token->mb_partial = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1808 return 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1809 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1810 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1811 if (c == '\\') |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1812 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1813 unsigned char c2; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1814 if (re_string_cur_idx (input) + 1 >= re_string_length (input)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1815 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1816 token->type = BACK_SLASH; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1817 return 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1818 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1819 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1820 c2 = re_string_peek_byte_case (input, 1); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1821 token->opr.c = c2; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1822 token->type = CHARACTER; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1823 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1824 if (input->mb_cur_max > 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1825 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1826 wint_t wc = re_string_wchar_at (input, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1827 re_string_cur_idx (input) + 1); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1828 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1829 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1830 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1831 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1832 token->word_char = IS_WORD_CHAR (c2) != 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1833 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1834 switch (c2) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1835 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1836 case '|': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1837 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1838 token->type = OP_ALT; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1839 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1840 case '1': case '2': case '3': case '4': case '5': |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1841 case '6': case '7': case '8': case '9': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1842 if (!(syntax & RE_NO_BK_REFS)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1843 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1844 token->type = OP_BACK_REF; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1845 token->opr.idx = c2 - '1'; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1846 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1847 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1848 case '<': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1849 if (!(syntax & RE_NO_GNU_OPS)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1850 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1851 token->type = ANCHOR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1852 token->opr.ctx_type = WORD_FIRST; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1853 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1854 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1855 case '>': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1856 if (!(syntax & RE_NO_GNU_OPS)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1857 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1858 token->type = ANCHOR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1859 token->opr.ctx_type = WORD_LAST; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1860 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1861 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1862 case 'b': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1863 if (!(syntax & RE_NO_GNU_OPS)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1864 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1865 token->type = ANCHOR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1866 token->opr.ctx_type = WORD_DELIM; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1867 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1868 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1869 case 'B': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1870 if (!(syntax & RE_NO_GNU_OPS)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1871 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1872 token->type = ANCHOR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1873 token->opr.ctx_type = NOT_WORD_DELIM; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1874 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1875 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1876 case 'w': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1877 if (!(syntax & RE_NO_GNU_OPS)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1878 token->type = OP_WORD; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1879 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1880 case 'W': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1881 if (!(syntax & RE_NO_GNU_OPS)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1882 token->type = OP_NOTWORD; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1883 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1884 case 's': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1885 if (!(syntax & RE_NO_GNU_OPS)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1886 token->type = OP_SPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1887 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1888 case 'S': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1889 if (!(syntax & RE_NO_GNU_OPS)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1890 token->type = OP_NOTSPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1891 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1892 case '`': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1893 if (!(syntax & RE_NO_GNU_OPS)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1894 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1895 token->type = ANCHOR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1896 token->opr.ctx_type = BUF_FIRST; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1897 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1898 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1899 case '\'': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1900 if (!(syntax & RE_NO_GNU_OPS)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1901 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1902 token->type = ANCHOR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1903 token->opr.ctx_type = BUF_LAST; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1904 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1905 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1906 case '(': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1907 if (!(syntax & RE_NO_BK_PARENS)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1908 token->type = OP_OPEN_SUBEXP; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1909 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1910 case ')': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1911 if (!(syntax & RE_NO_BK_PARENS)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1912 token->type = OP_CLOSE_SUBEXP; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1913 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1914 case '+': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1915 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1916 token->type = OP_DUP_PLUS; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1917 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1918 case '?': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1919 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1920 token->type = OP_DUP_QUESTION; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1921 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1922 case '{': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1923 if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1924 token->type = OP_OPEN_DUP_NUM; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1925 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1926 case '}': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1927 if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1928 token->type = OP_CLOSE_DUP_NUM; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1929 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1930 default: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1931 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1932 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1933 return 2; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1934 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1935 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1936 token->type = CHARACTER; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1937 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1938 if (input->mb_cur_max > 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1939 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1940 wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1941 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1942 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1943 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1944 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1945 token->word_char = IS_WORD_CHAR (token->opr.c); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1946 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1947 switch (c) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1948 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1949 case '\n': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1950 if (syntax & RE_NEWLINE_ALT) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1951 token->type = OP_ALT; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1952 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1953 case '|': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1954 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1955 token->type = OP_ALT; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1956 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1957 case '*': |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1958 token->type = OP_DUP_ASTERISK; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1959 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1960 case '+': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1961 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1962 token->type = OP_DUP_PLUS; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1963 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1964 case '?': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1965 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1966 token->type = OP_DUP_QUESTION; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1967 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1968 case '{': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1969 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1970 token->type = OP_OPEN_DUP_NUM; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1971 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1972 case '}': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1973 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1974 token->type = OP_CLOSE_DUP_NUM; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1975 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1976 case '(': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1977 if (syntax & RE_NO_BK_PARENS) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1978 token->type = OP_OPEN_SUBEXP; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1979 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1980 case ')': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1981 if (syntax & RE_NO_BK_PARENS) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1982 token->type = OP_CLOSE_SUBEXP; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1983 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1984 case '[': |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1985 token->type = OP_OPEN_BRACKET; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1986 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1987 case '.': |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1988 token->type = OP_PERIOD; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1989 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1990 case '^': |
40245 | 1991 if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) |
1992 && re_string_cur_idx (input) != 0) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1993 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1994 char prev = re_string_peek_byte (input, -1); |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
1995 if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1996 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1997 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1998 token->type = ANCHOR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1999 token->opr.ctx_type = LINE_FIRST; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2000 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2001 case '$': |
40245 | 2002 if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) |
2003 && re_string_cur_idx (input) + 1 != re_string_length (input)) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2004 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2005 re_token_t next; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2006 re_string_skip_bytes (input, 1); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2007 peek_token (&next, input, syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2008 re_string_skip_bytes (input, -1); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2009 if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2010 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2011 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2012 token->type = ANCHOR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2013 token->opr.ctx_type = LINE_LAST; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2014 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2015 default: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2016 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2017 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2018 return 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2019 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2020 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2021 /* Peek a token from INPUT, and return the length of the token. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2022 We must not use this function out of bracket expressions. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2023 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2024 static int |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
2025 peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2026 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2027 unsigned char c; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2028 if (re_string_eoi (input)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2029 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2030 token->type = END_OF_RE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2031 return 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2032 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2033 c = re_string_peek_byte (input, 0); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2034 token->opr.c = c; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2035 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2036 #ifdef RE_ENABLE_I18N |
40245 | 2037 if (input->mb_cur_max > 1 |
2038 && !re_string_first_byte (input, re_string_cur_idx (input))) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2039 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2040 token->type = CHARACTER; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2041 return 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2042 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2043 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2044 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2045 if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2046 && re_string_cur_idx (input) + 1 < re_string_length (input)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2047 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2048 /* In this case, '\' escape a character. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2049 unsigned char c2; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2050 re_string_skip_bytes (input, 1); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2051 c2 = re_string_peek_byte (input, 0); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2052 token->opr.c = c2; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2053 token->type = CHARACTER; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2054 return 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2055 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2056 if (c == '[') /* '[' is a special char in a bracket exps. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2057 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2058 unsigned char c2; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2059 int token_len; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2060 if (re_string_cur_idx (input) + 1 < re_string_length (input)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2061 c2 = re_string_peek_byte (input, 1); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2062 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2063 c2 = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2064 token->opr.c = c2; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2065 token_len = 2; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2066 switch (c2) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2067 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2068 case '.': |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2069 token->type = OP_OPEN_COLL_ELEM; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2070 break; |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2071 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2072 case '=': |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2073 token->type = OP_OPEN_EQUIV_CLASS; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2074 break; |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2075 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2076 case ':': |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2077 if (syntax & RE_CHAR_CLASSES) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2078 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2079 token->type = OP_OPEN_CHAR_CLASS; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2080 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2081 } |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2082 FALLTHROUGH; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2083 default: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2084 token->type = CHARACTER; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2085 token->opr.c = c; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2086 token_len = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2087 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2088 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2089 return token_len; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2090 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2091 switch (c) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2092 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2093 case '-': |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2094 token->type = OP_CHARSET_RANGE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2095 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2096 case ']': |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2097 token->type = OP_CLOSE_BRACKET; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2098 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2099 case '^': |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2100 token->type = OP_NON_MATCH_LIST; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2101 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2102 default: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2103 token->type = CHARACTER; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2104 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2105 return 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2106 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2107 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2108 /* Functions for parser. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2109 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2110 /* Entry point of the parser. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2111 Parse the regular expression REGEXP and return the structure tree. |
16358 | 2112 If an error occurs, ERR is set by error code, and return NULL. |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2113 This function build the following tree, from regular expression <reg_exp>: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2114 CAT |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2115 / \ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2116 / \ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2117 <reg_exp> EOR |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2118 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2119 CAT means concatenation. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2120 EOR means end of regular expression. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2121 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2122 static bin_tree_t * |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
2123 parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
2124 reg_errcode_t *err) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2125 { |
16770
e011e0a7ab5a
regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents:
16730
diff
changeset
|
2126 re_dfa_t *dfa = preg->buffer; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2127 bin_tree_t *tree, *eor, *root; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2128 re_token_t current_token; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2129 dfa->syntax = syntax; |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2130 fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2131 tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); |
39918 | 2132 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2133 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2134 eor = create_tree (dfa, NULL, NULL, END_OF_RE); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2135 if (tree != NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2136 root = create_tree (dfa, tree, eor, CONCAT); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2137 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2138 root = eor; |
39918 | 2139 if (__glibc_unlikely (eor == NULL || root == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2140 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2141 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2142 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2143 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2144 return root; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2145 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2146 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2147 /* This function build the following tree, from regular expression |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2148 <branch1>|<branch2>: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2149 ALT |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2150 / \ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2151 / \ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2152 <branch1> <branch2> |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2153 |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
2154 ALT means alternative, which represents the operator '|'. */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2155 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2156 static bin_tree_t * |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
2157 parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2158 reg_syntax_t syntax, Idx nest, reg_errcode_t *err) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2159 { |
16770
e011e0a7ab5a
regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents:
16730
diff
changeset
|
2160 re_dfa_t *dfa = preg->buffer; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2161 bin_tree_t *tree, *branch = NULL; |
18096 | 2162 bitset_word_t initial_bkref_map = dfa->completed_bkref_map; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2163 tree = parse_branch (regexp, preg, token, syntax, nest, err); |
39918 | 2164 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2165 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2166 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2167 while (token->type == OP_ALT) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2168 { |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2169 fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2170 if (token->type != OP_ALT && token->type != END_OF_RE |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2171 && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2172 { |
18096 | 2173 bitset_word_t accumulated_bkref_map = dfa->completed_bkref_map; |
2174 dfa->completed_bkref_map = initial_bkref_map; | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2175 branch = parse_branch (regexp, preg, token, syntax, nest, err); |
39918 | 2176 if (__glibc_unlikely (*err != REG_NOERROR && branch == NULL)) |
17723
9edabe80a556
regex: fix memory leak in compiler
Paul Eggert <eggert@cs.ucla.edu>
parents:
17710
diff
changeset
|
2177 { |
9edabe80a556
regex: fix memory leak in compiler
Paul Eggert <eggert@cs.ucla.edu>
parents:
17710
diff
changeset
|
2178 if (tree != NULL) |
9edabe80a556
regex: fix memory leak in compiler
Paul Eggert <eggert@cs.ucla.edu>
parents:
17710
diff
changeset
|
2179 postorder (tree, free_tree, NULL); |
9edabe80a556
regex: fix memory leak in compiler
Paul Eggert <eggert@cs.ucla.edu>
parents:
17710
diff
changeset
|
2180 return NULL; |
9edabe80a556
regex: fix memory leak in compiler
Paul Eggert <eggert@cs.ucla.edu>
parents:
17710
diff
changeset
|
2181 } |
18096 | 2182 dfa->completed_bkref_map |= accumulated_bkref_map; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2183 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2184 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2185 branch = NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2186 tree = create_tree (dfa, tree, branch, OP_ALT); |
39918 | 2187 if (__glibc_unlikely (tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2188 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2189 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2190 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2191 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2192 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2193 return tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2194 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2195 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2196 /* This function build the following tree, from regular expression |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2197 <exp1><exp2>: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2198 CAT |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2199 / \ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2200 / \ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2201 <exp1> <exp2> |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2202 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2203 CAT means concatenation. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2204 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2205 static bin_tree_t * |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
2206 parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2207 reg_syntax_t syntax, Idx nest, reg_errcode_t *err) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2208 { |
7694
c818925b8298
* lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents:
6733
diff
changeset
|
2209 bin_tree_t *tree, *expr; |
16770
e011e0a7ab5a
regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents:
16730
diff
changeset
|
2210 re_dfa_t *dfa = preg->buffer; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2211 tree = parse_expression (regexp, preg, token, syntax, nest, err); |
39918 | 2212 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2213 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2214 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2215 while (token->type != OP_ALT && token->type != END_OF_RE |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2216 && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2217 { |
7694
c818925b8298
* lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents:
6733
diff
changeset
|
2218 expr = parse_expression (regexp, preg, token, syntax, nest, err); |
39918 | 2219 if (__glibc_unlikely (*err != REG_NOERROR && expr == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2220 { |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2221 if (tree != NULL) |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2222 postorder (tree, free_tree, NULL); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2223 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2224 } |
7694
c818925b8298
* lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents:
6733
diff
changeset
|
2225 if (tree != NULL && expr != NULL) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2226 { |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2227 bin_tree_t *newtree = create_tree (dfa, tree, expr, CONCAT); |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2228 if (newtree == NULL) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2229 { |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2230 postorder (expr, free_tree, NULL); |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2231 postorder (tree, free_tree, NULL); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2232 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2233 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2234 } |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2235 tree = newtree; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2236 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2237 else if (tree == NULL) |
7694
c818925b8298
* lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents:
6733
diff
changeset
|
2238 tree = expr; |
c818925b8298
* lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents:
6733
diff
changeset
|
2239 /* Otherwise expr == NULL, we don't need to create new tree. */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2240 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2241 return tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2242 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2243 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2244 /* This function build the following tree, from regular expression a*: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2245 * |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2246 | |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2247 a |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2248 */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2249 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2250 static bin_tree_t * |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
2251 parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2252 reg_syntax_t syntax, Idx nest, reg_errcode_t *err) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2253 { |
16770
e011e0a7ab5a
regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents:
16730
diff
changeset
|
2254 re_dfa_t *dfa = preg->buffer; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2255 bin_tree_t *tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2256 switch (token->type) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2257 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2258 case CHARACTER: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2259 tree = create_token_tree (dfa, NULL, NULL, token); |
39918 | 2260 if (__glibc_unlikely (tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2261 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2262 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2263 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2264 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2265 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2266 if (dfa->mb_cur_max > 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2267 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2268 while (!re_string_eoi (regexp) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2269 && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2270 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2271 bin_tree_t *mbc_remain; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2272 fetch_token (token, regexp, syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2273 mbc_remain = create_token_tree (dfa, NULL, NULL, token); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2274 tree = create_tree (dfa, tree, mbc_remain, CONCAT); |
39918 | 2275 if (__glibc_unlikely (mbc_remain == NULL || tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2276 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2277 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2278 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2279 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2280 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2281 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2282 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2283 break; |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2284 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2285 case OP_OPEN_SUBEXP: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2286 tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); |
39918 | 2287 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2288 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2289 break; |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2290 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2291 case OP_OPEN_BRACKET: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2292 tree = parse_bracket_exp (regexp, dfa, token, syntax, err); |
39918 | 2293 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2294 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2295 break; |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2296 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2297 case OP_BACK_REF: |
39918 | 2298 if (!__glibc_likely (dfa->completed_bkref_map & (1 << token->opr.idx))) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2299 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2300 *err = REG_ESUBREG; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2301 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2302 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2303 dfa->used_bkref_map |= 1 << token->opr.idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2304 tree = create_token_tree (dfa, NULL, NULL, token); |
39918 | 2305 if (__glibc_unlikely (tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2306 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2307 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2308 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2309 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2310 ++dfa->nbackref; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2311 dfa->has_mb_node = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2312 break; |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2313 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2314 case OP_OPEN_DUP_NUM: |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2315 if (syntax & RE_CONTEXT_INVALID_DUP) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2316 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2317 *err = REG_BADRPT; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2318 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2319 } |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2320 FALLTHROUGH; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2321 case OP_DUP_ASTERISK: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2322 case OP_DUP_PLUS: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2323 case OP_DUP_QUESTION: |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2324 if (syntax & RE_CONTEXT_INVALID_OPS) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2325 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2326 *err = REG_BADRPT; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2327 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2328 } |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2329 else if (syntax & RE_CONTEXT_INDEP_OPS) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2330 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2331 fetch_token (token, regexp, syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2332 return parse_expression (regexp, preg, token, syntax, nest, err); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2333 } |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2334 FALLTHROUGH; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2335 case OP_CLOSE_SUBEXP: |
40245 | 2336 if ((token->type == OP_CLOSE_SUBEXP) |
2337 && !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2338 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2339 *err = REG_ERPAREN; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2340 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2341 } |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2342 FALLTHROUGH; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2343 case OP_CLOSE_DUP_NUM: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2344 /* We treat it as a normal character. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2345 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2346 /* Then we can these characters as normal characters. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2347 token->type = CHARACTER; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2348 /* mb_partial and word_char bits should be initialized already |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2349 by peek_token. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2350 tree = create_token_tree (dfa, NULL, NULL, token); |
39918 | 2351 if (__glibc_unlikely (tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2352 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2353 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2354 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2355 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2356 break; |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2357 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2358 case ANCHOR: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2359 if ((token->opr.ctx_type |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2360 & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2361 && dfa->word_ops_used == 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2362 init_word_char (dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2363 if (token->opr.ctx_type == WORD_DELIM |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
2364 || token->opr.ctx_type == NOT_WORD_DELIM) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2365 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2366 bin_tree_t *tree_first, *tree_last; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2367 if (token->opr.ctx_type == WORD_DELIM) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2368 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2369 token->opr.ctx_type = WORD_FIRST; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2370 tree_first = create_token_tree (dfa, NULL, NULL, token); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2371 token->opr.ctx_type = WORD_LAST; |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
2372 } |
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
2373 else |
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
2374 { |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2375 token->opr.ctx_type = INSIDE_WORD; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2376 tree_first = create_token_tree (dfa, NULL, NULL, token); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2377 token->opr.ctx_type = INSIDE_NOTWORD; |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
2378 } |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2379 tree_last = create_token_tree (dfa, NULL, NULL, token); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2380 tree = create_tree (dfa, tree_first, tree_last, OP_ALT); |
39918 | 2381 if (__glibc_unlikely (tree_first == NULL || tree_last == NULL |
2382 || tree == NULL)) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2383 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2384 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2385 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2386 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2387 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2388 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2389 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2390 tree = create_token_tree (dfa, NULL, NULL, token); |
39918 | 2391 if (__glibc_unlikely (tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2392 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2393 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2394 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2395 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2396 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2397 /* We must return here, since ANCHORs can't be followed |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2398 by repetition operators. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2399 eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>", |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2400 it must not be "<ANCHOR(^)><REPEAT(*)>". */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2401 fetch_token (token, regexp, syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2402 return tree; |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2403 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2404 case OP_PERIOD: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2405 tree = create_token_tree (dfa, NULL, NULL, token); |
39918 | 2406 if (__glibc_unlikely (tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2407 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2408 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2409 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2410 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2411 if (dfa->mb_cur_max > 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2412 dfa->has_mb_node = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2413 break; |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2414 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2415 case OP_WORD: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2416 case OP_NOTWORD: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2417 tree = build_charclass_op (dfa, regexp->trans, |
17258
28b073aabf32
regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents:
17249
diff
changeset
|
2418 "alnum", |
28b073aabf32
regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents:
17249
diff
changeset
|
2419 "_", |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2420 token->type == OP_NOTWORD, err); |
39918 | 2421 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2422 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2423 break; |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2424 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2425 case OP_SPACE: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2426 case OP_NOTSPACE: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2427 tree = build_charclass_op (dfa, regexp->trans, |
17258
28b073aabf32
regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents:
17249
diff
changeset
|
2428 "space", |
28b073aabf32
regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents:
17249
diff
changeset
|
2429 "", |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2430 token->type == OP_NOTSPACE, err); |
39918 | 2431 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2432 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2433 break; |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2434 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2435 case OP_ALT: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2436 case END_OF_RE: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2437 return NULL; |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2438 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2439 case BACK_SLASH: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2440 *err = REG_EESCAPE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2441 return NULL; |
19013
fa6b743e021c
regex: work with GCC7's -Werror=implicit-fallthrough=
Paul Eggert <eggert@cs.ucla.edu>
parents:
18626
diff
changeset
|
2442 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2443 default: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2444 /* Must not happen? */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2445 #ifdef DEBUG |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2446 assert (0); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2447 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2448 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2449 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2450 fetch_token (token, regexp, syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2451 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2452 while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2453 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2454 { |
17710
fe5bf4d5ee95
regex: fix memory leak in compiler
Paul Eggert <eggert@penguin.cs.ucla.edu>
parents:
17576
diff
changeset
|
2455 bin_tree_t *dup_tree = parse_dup_op (tree, regexp, dfa, token, |
fe5bf4d5ee95
regex: fix memory leak in compiler
Paul Eggert <eggert@penguin.cs.ucla.edu>
parents:
17576
diff
changeset
|
2456 syntax, err); |
39918 | 2457 if (__glibc_unlikely (*err != REG_NOERROR && dup_tree == NULL)) |
17710
fe5bf4d5ee95
regex: fix memory leak in compiler
Paul Eggert <eggert@penguin.cs.ucla.edu>
parents:
17576
diff
changeset
|
2458 { |
fe5bf4d5ee95
regex: fix memory leak in compiler
Paul Eggert <eggert@penguin.cs.ucla.edu>
parents:
17576
diff
changeset
|
2459 if (tree != NULL) |
fe5bf4d5ee95
regex: fix memory leak in compiler
Paul Eggert <eggert@penguin.cs.ucla.edu>
parents:
17576
diff
changeset
|
2460 postorder (tree, free_tree, NULL); |
fe5bf4d5ee95
regex: fix memory leak in compiler
Paul Eggert <eggert@penguin.cs.ucla.edu>
parents:
17576
diff
changeset
|
2461 return NULL; |
fe5bf4d5ee95
regex: fix memory leak in compiler
Paul Eggert <eggert@penguin.cs.ucla.edu>
parents:
17576
diff
changeset
|
2462 } |
fe5bf4d5ee95
regex: fix memory leak in compiler
Paul Eggert <eggert@penguin.cs.ucla.edu>
parents:
17576
diff
changeset
|
2463 tree = dup_tree; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2464 /* In BRE consecutive duplications are not allowed. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2465 if ((syntax & RE_CONTEXT_INVALID_DUP) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2466 && (token->type == OP_DUP_ASTERISK |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2467 || token->type == OP_OPEN_DUP_NUM)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2468 { |
17710
fe5bf4d5ee95
regex: fix memory leak in compiler
Paul Eggert <eggert@penguin.cs.ucla.edu>
parents:
17576
diff
changeset
|
2469 if (tree != NULL) |
fe5bf4d5ee95
regex: fix memory leak in compiler
Paul Eggert <eggert@penguin.cs.ucla.edu>
parents:
17576
diff
changeset
|
2470 postorder (tree, free_tree, NULL); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2471 *err = REG_BADRPT; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2472 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2473 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2474 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2475 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2476 return tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2477 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2478 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2479 /* This function build the following tree, from regular expression |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2480 (<reg_exp>): |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2481 SUBEXP |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2482 | |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2483 <reg_exp> |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2484 */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2485 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2486 static bin_tree_t * |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
2487 parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2488 reg_syntax_t syntax, Idx nest, reg_errcode_t *err) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2489 { |
16770
e011e0a7ab5a
regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents:
16730
diff
changeset
|
2490 re_dfa_t *dfa = preg->buffer; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2491 bin_tree_t *tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2492 size_t cur_nsub; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2493 cur_nsub = preg->re_nsub++; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2494 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2495 fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2496 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2497 /* The subexpression may be a null string. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2498 if (token->type == OP_CLOSE_SUBEXP) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2499 tree = NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2500 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2501 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2502 tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); |
39918 | 2503 if (__glibc_unlikely (*err == REG_NOERROR |
2504 && token->type != OP_CLOSE_SUBEXP)) | |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2505 { |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2506 if (tree != NULL) |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2507 postorder (tree, free_tree, NULL); |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2508 *err = REG_EPAREN; |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2509 } |
39918 | 2510 if (__glibc_unlikely (*err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2511 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2512 } |
6171
5862ee08bfc1
* lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents:
6170
diff
changeset
|
2513 |
5862ee08bfc1
* lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents:
6170
diff
changeset
|
2514 if (cur_nsub <= '9' - '1') |
5862ee08bfc1
* lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents:
6170
diff
changeset
|
2515 dfa->completed_bkref_map |= 1 << cur_nsub; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2516 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2517 tree = create_tree (dfa, tree, NULL, SUBEXP); |
39918 | 2518 if (__glibc_unlikely (tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2519 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2520 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2521 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2522 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2523 tree->token.opr.idx = cur_nsub; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2524 return tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2525 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2526 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2527 /* This function parse repetition operators like "*", "+", "{1,3}" etc. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2528 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2529 static bin_tree_t * |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
2530 parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
2531 re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2532 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2533 bin_tree_t *tree = NULL, *old_tree = NULL; |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2534 Idx i, start, end, start_idx = re_string_cur_idx (regexp); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2535 re_token_t start_token = *token; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2536 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2537 if (token->type == OP_OPEN_DUP_NUM) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2538 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2539 end = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2540 start = fetch_number (regexp, token, syntax); |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
2541 if (start == -1) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2542 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2543 if (token->type == CHARACTER && token->opr.c == ',') |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2544 start = 0; /* We treat "{,m}" as "{0,m}". */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2545 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2546 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2547 *err = REG_BADBR; /* <re>{} is invalid. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2548 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2549 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2550 } |
39918 | 2551 if (__glibc_likely (start != -2)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2552 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2553 /* We treat "{n}" as "{n,n}". */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2554 end = ((token->type == OP_CLOSE_DUP_NUM) ? start |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2555 : ((token->type == CHARACTER && token->opr.c == ',') |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
2556 ? fetch_number (regexp, token, syntax) : -2)); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2557 } |
39918 | 2558 if (__glibc_unlikely (start == -2 || end == -2)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2559 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2560 /* Invalid sequence. */ |
39918 | 2561 if (__glibc_unlikely (!(syntax & RE_INVALID_INTERVAL_ORD))) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2562 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2563 if (token->type == END_OF_RE) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2564 *err = REG_EBRACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2565 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2566 *err = REG_BADBR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2567 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2568 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2569 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2570 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2571 /* If the syntax bit is set, rollback. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2572 re_string_set_index (regexp, start_idx); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2573 *token = start_token; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2574 token->type = CHARACTER; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2575 /* mb_partial and word_char bits should be already initialized by |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2576 peek_token. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2577 return elem; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2578 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2579 |
39918 | 2580 if (__glibc_unlikely ((end != -1 && start > end) |
2581 || token->type != OP_CLOSE_DUP_NUM)) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2582 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2583 /* First number greater than second. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2584 *err = REG_BADBR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2585 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2586 } |
16705
54b750a813cb
regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents:
16366
diff
changeset
|
2587 |
39918 | 2588 if (__glibc_unlikely (RE_DUP_MAX < (end == -1 ? start : end))) |
16705
54b750a813cb
regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents:
16366
diff
changeset
|
2589 { |
54b750a813cb
regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents:
16366
diff
changeset
|
2590 *err = REG_ESIZE; |
54b750a813cb
regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents:
16366
diff
changeset
|
2591 return NULL; |
54b750a813cb
regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents:
16366
diff
changeset
|
2592 } |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2593 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2594 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2595 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2596 start = (token->type == OP_DUP_PLUS) ? 1 : 0; |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
2597 end = (token->type == OP_DUP_QUESTION) ? 1 : -1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2598 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2599 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2600 fetch_token (token, regexp, syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2601 |
39918 | 2602 if (__glibc_unlikely (elem == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2603 return NULL; |
39918 | 2604 if (__glibc_unlikely (start == 0 && end == 0)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2605 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2606 postorder (elem, free_tree, NULL); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2607 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2608 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2609 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2610 /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */ |
39918 | 2611 if (__glibc_unlikely (start > 0)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2612 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2613 tree = elem; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2614 for (i = 2; i <= start; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2615 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2616 elem = duplicate_tree (elem, dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2617 tree = create_tree (dfa, tree, elem, CONCAT); |
39918 | 2618 if (__glibc_unlikely (elem == NULL || tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2619 goto parse_dup_op_espace; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2620 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2621 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2622 if (start == end) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2623 return tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2624 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2625 /* Duplicate ELEM before it is marked optional. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2626 elem = duplicate_tree (elem, dfa); |
39918 | 2627 if (__glibc_unlikely (elem == NULL)) |
17725
d65323023063
regex: don't deref NULL upon heap allocation failure
Jim Meyering <meyering@fb.com>
parents:
17723
diff
changeset
|
2628 goto parse_dup_op_espace; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2629 old_tree = tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2630 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2631 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2632 old_tree = NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2633 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2634 if (elem->token.type == SUBEXP) |
16912
1591c84dbb2d
regex: avoid warning when pointers are not long
Paul Eggert <eggert@cs.ucla.edu>
parents:
16882
diff
changeset
|
2635 { |
1591c84dbb2d
regex: avoid warning when pointers are not long
Paul Eggert <eggert@cs.ucla.edu>
parents:
16882
diff
changeset
|
2636 uintptr_t subidx = elem->token.opr.idx; |
1591c84dbb2d
regex: avoid warning when pointers are not long
Paul Eggert <eggert@cs.ucla.edu>
parents:
16882
diff
changeset
|
2637 postorder (elem, mark_opt_subexp, (void *) subidx); |
1591c84dbb2d
regex: avoid warning when pointers are not long
Paul Eggert <eggert@cs.ucla.edu>
parents:
16882
diff
changeset
|
2638 } |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2639 |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2640 tree = create_tree (dfa, elem, NULL, |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
2641 (end == -1 ? OP_DUP_ASTERISK : OP_ALT)); |
39918 | 2642 if (__glibc_unlikely (tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2643 goto parse_dup_op_espace; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2644 |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
2645 /* This loop is actually executed only when end != -1, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2646 to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2647 already created the start+1-th copy. */ |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
2648 if (TYPE_SIGNED (Idx) || end != -1) |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2649 for (i = start + 2; i <= end; ++i) |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2650 { |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2651 elem = duplicate_tree (elem, dfa); |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2652 tree = create_tree (dfa, tree, elem, CONCAT); |
39918 | 2653 if (__glibc_unlikely (elem == NULL || tree == NULL)) |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2654 goto parse_dup_op_espace; |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2655 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2656 tree = create_tree (dfa, tree, NULL, OP_ALT); |
39918 | 2657 if (__glibc_unlikely (tree == NULL)) |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2658 goto parse_dup_op_espace; |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2659 } |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2660 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2661 if (old_tree) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2662 tree = create_tree (dfa, old_tree, tree, CONCAT); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2663 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2664 return tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2665 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2666 parse_dup_op_espace: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2667 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2668 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2669 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2670 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2671 /* Size of the names for collating symbol/equivalence_class/character_class. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2672 I'm not sure, but maybe enough. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2673 #define BRACKET_NAME_BUF_SIZE 32 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2674 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2675 #ifndef _LIBC |
18228
a097fd3fd500
regex: treat [x] as x if x is a unibyte encoding error
Paul Eggert <eggert@cs.ucla.edu>
parents:
18219
diff
changeset
|
2676 |
a097fd3fd500
regex: treat [x] as x if x is a unibyte encoding error
Paul Eggert <eggert@cs.ucla.edu>
parents:
18219
diff
changeset
|
2677 # ifdef RE_ENABLE_I18N |
a097fd3fd500
regex: treat [x] as x if x is a unibyte encoding error
Paul Eggert <eggert@cs.ucla.edu>
parents:
18219
diff
changeset
|
2678 /* Convert the byte B to the corresponding wide character. In a |
39779 | 2679 unibyte locale, treat B as itself. In a multibyte locale, return |
2680 WEOF if B is an encoding error. */ | |
18228
a097fd3fd500
regex: treat [x] as x if x is a unibyte encoding error
Paul Eggert <eggert@cs.ucla.edu>
parents:
18219
diff
changeset
|
2681 static wint_t |
a097fd3fd500
regex: treat [x] as x if x is a unibyte encoding error
Paul Eggert <eggert@cs.ucla.edu>
parents:
18219
diff
changeset
|
2682 parse_byte (unsigned char b, re_charset_t *mbcset) |
a097fd3fd500
regex: treat [x] as x if x is a unibyte encoding error
Paul Eggert <eggert@cs.ucla.edu>
parents:
18219
diff
changeset
|
2683 { |
39779 | 2684 return mbcset == NULL ? b : __btowc (b); |
18228
a097fd3fd500
regex: treat [x] as x if x is a unibyte encoding error
Paul Eggert <eggert@cs.ucla.edu>
parents:
18219
diff
changeset
|
2685 } |
39779 | 2686 # endif |
18228
a097fd3fd500
regex: treat [x] as x if x is a unibyte encoding error
Paul Eggert <eggert@cs.ucla.edu>
parents:
18219
diff
changeset
|
2687 |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2688 /* Local function for parse_bracket_exp only used in case of NOT _LIBC. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2689 Build the range expression which starts from START_ELEM, and ends |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2690 at END_ELEM. The result are written to MBCSET and SBCSET. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2691 RANGE_ALLOC is the allocated size of mbcset->range_starts, and |
16358 | 2692 mbcset->range_ends, is a pointer argument since we may |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2693 update it. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2694 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2695 static reg_errcode_t |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2696 # ifdef RE_ENABLE_I18N |
13014
6240e99d3e0c
regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents:
12848
diff
changeset
|
2697 build_range_exp (const reg_syntax_t syntax, |
6240e99d3e0c
regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents:
12848
diff
changeset
|
2698 bitset_t sbcset, |
6240e99d3e0c
regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents:
12848
diff
changeset
|
2699 re_charset_t *mbcset, |
6240e99d3e0c
regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents:
12848
diff
changeset
|
2700 Idx *range_alloc, |
6240e99d3e0c
regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents:
12848
diff
changeset
|
2701 const bracket_elem_t *start_elem, |
6240e99d3e0c
regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents:
12848
diff
changeset
|
2702 const bracket_elem_t *end_elem) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2703 # else /* not RE_ENABLE_I18N */ |
13014
6240e99d3e0c
regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents:
12848
diff
changeset
|
2704 build_range_exp (const reg_syntax_t syntax, |
6240e99d3e0c
regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents:
12848
diff
changeset
|
2705 bitset_t sbcset, |
6240e99d3e0c
regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents:
12848
diff
changeset
|
2706 const bracket_elem_t *start_elem, |
6240e99d3e0c
regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents:
12848
diff
changeset
|
2707 const bracket_elem_t *end_elem) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2708 # endif /* not RE_ENABLE_I18N */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2709 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2710 unsigned int start_ch, end_ch; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2711 /* Equivalence Classes and Character Classes can't be a range start/end. */ |
39918 | 2712 if (__glibc_unlikely (start_elem->type == EQUIV_CLASS |
2713 || start_elem->type == CHAR_CLASS | |
2714 || end_elem->type == EQUIV_CLASS | |
2715 || end_elem->type == CHAR_CLASS)) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2716 return REG_ERANGE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2717 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2718 /* We can handle no multi character collating elements without libc |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2719 support. */ |
39918 | 2720 if (__glibc_unlikely ((start_elem->type == COLL_SYM |
2721 && strlen ((char *) start_elem->opr.name) > 1) | |
2722 || (end_elem->type == COLL_SYM | |
2723 && strlen ((char *) end_elem->opr.name) > 1))) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2724 return REG_ECOLLATE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2725 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2726 # ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2727 { |
5972
aa260da0bbbe
* config/srclist.txt: Comment out regcomp.c, since we have a porting fix
Paul Eggert <eggert@cs.ucla.edu>
parents:
5968
diff
changeset
|
2728 wchar_t wc; |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2729 wint_t start_wc; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2730 wint_t end_wc; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2731 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2732 start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2733 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2734 : 0)); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2735 end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2736 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2737 : 0)); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2738 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) |
18228
a097fd3fd500
regex: treat [x] as x if x is a unibyte encoding error
Paul Eggert <eggert@cs.ucla.edu>
parents:
18219
diff
changeset
|
2739 ? parse_byte (start_ch, mbcset) : start_elem->opr.wch); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2740 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) |
18228
a097fd3fd500
regex: treat [x] as x if x is a unibyte encoding error
Paul Eggert <eggert@cs.ucla.edu>
parents:
18219
diff
changeset
|
2741 ? parse_byte (end_ch, mbcset) : end_elem->opr.wch); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2742 if (start_wc == WEOF || end_wc == WEOF) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2743 return REG_ECOLLATE; |
39918 | 2744 else if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES) |
2745 && start_wc > end_wc)) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2746 return REG_ERANGE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2747 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2748 /* Got valid collation sequence values, add them as a new entry. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2749 However, for !_LIBC we have no collation elements: if the |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2750 character set is single byte, the single byte character set |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2751 that we build below suffices. parse_bracket_exp passes |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2752 no MBCSET if dfa->mb_cur_max == 1. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2753 if (mbcset) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2754 { |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
2755 /* Check the space of the arrays. */ |
39918 | 2756 if (__glibc_unlikely (*range_alloc == mbcset->nranges)) |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
2757 { |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2758 /* There is not enough space, need realloc. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2759 wchar_t *new_array_start, *new_array_end; |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2760 Idx new_nranges; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2761 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2762 /* +1 in case of mbcset->nranges is 0. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2763 new_nranges = 2 * mbcset->nranges + 1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2764 /* Use realloc since mbcset->range_starts and mbcset->range_ends |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2765 are NULL if *range_alloc == 0. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2766 new_array_start = re_realloc (mbcset->range_starts, wchar_t, |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
2767 new_nranges); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2768 new_array_end = re_realloc (mbcset->range_ends, wchar_t, |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
2769 new_nranges); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2770 |
39918 | 2771 if (__glibc_unlikely (new_array_start == NULL |
2772 || new_array_end == NULL)) | |
18217 | 2773 { |
2774 re_free (new_array_start); | |
2775 re_free (new_array_end); | |
2776 return REG_ESPACE; | |
2777 } | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2778 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2779 mbcset->range_starts = new_array_start; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2780 mbcset->range_ends = new_array_end; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2781 *range_alloc = new_nranges; |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
2782 } |
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
2783 |
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
2784 mbcset->range_starts[mbcset->nranges] = start_wc; |
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
2785 mbcset->range_ends[mbcset->nranges++] = end_wc; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2786 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2787 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2788 /* Build the table for single byte characters. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2789 for (wc = 0; wc < SBC_MAX; ++wc) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2790 { |
17237
899138bc3a58
regex: implement rational ranges
Paul Eggert <eggert@cs.ucla.edu>
parents:
17234
diff
changeset
|
2791 if (start_wc <= wc && wc <= end_wc) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2792 bitset_set (sbcset, wc); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2793 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2794 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2795 # else /* not RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2796 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2797 unsigned int ch; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2798 start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2799 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2800 : 0)); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2801 end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2802 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2803 : 0)); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2804 if (start_ch > end_ch) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2805 return REG_ERANGE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2806 /* Build the table for single byte characters. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2807 for (ch = 0; ch < SBC_MAX; ++ch) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2808 if (start_ch <= ch && ch <= end_ch) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2809 bitset_set (sbcset, ch); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2810 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2811 # endif /* not RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2812 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2813 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2814 #endif /* not _LIBC */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2815 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2816 #ifndef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2817 /* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2818 Build the collating element which is represented by NAME. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2819 The result are written to MBCSET and SBCSET. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2820 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2821 pointer argument since we may update it. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2822 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2823 static reg_errcode_t |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2824 # ifdef RE_ENABLE_I18N |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2825 build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2826 Idx *coll_sym_alloc, const unsigned char *name) |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2827 # else /* not RE_ENABLE_I18N */ |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2828 build_collating_symbol (bitset_t sbcset, const unsigned char *name) |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
2829 # endif /* not RE_ENABLE_I18N */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2830 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2831 size_t name_len = strlen ((const char *) name); |
39918 | 2832 if (__glibc_unlikely (name_len != 1)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2833 return REG_ECOLLATE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2834 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2835 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2836 bitset_set (sbcset, name[0]); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2837 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2838 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2839 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2840 #endif /* not _LIBC */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2841 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2842 /* This function parse bracket expression like "[abc]", "[a-c]", |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2843 "[[.a-a.]]" etc. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2844 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2845 static bin_tree_t * |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
2846 parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
2847 reg_syntax_t syntax, reg_errcode_t *err) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2848 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2849 #ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2850 const unsigned char *collseqmb; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2851 const char *collseqwc; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2852 uint32_t nrules; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2853 int32_t table_size; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2854 const int32_t *symb_table; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2855 const unsigned char *extra; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2856 |
16358 | 2857 /* Local function for parse_bracket_exp used in _LIBC environment. |
2858 Seek the collating symbol entry corresponding to NAME. | |
17338
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2859 Return the index of the symbol in the SYMB_TABLE, |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2860 or -1 if not found. */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2861 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2862 auto inline int32_t |
17346
cd38818bce4e
regex: rename remaining __attribute calls to __attribute__.
Gary V. Vaughan <gary@gnu.org>
parents:
17338
diff
changeset
|
2863 __attribute__ ((always_inline)) |
17338
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2864 seek_collating_symbol_entry (const unsigned char *name, size_t name_len) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2865 { |
17338
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2866 int32_t elem; |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2867 |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2868 for (elem = 0; elem < table_size; elem++) |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2869 if (symb_table[2 * elem] != 0) |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2870 { |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2871 int32_t idx = symb_table[2 * elem + 1]; |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2872 /* Skip the name of collating element name. */ |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2873 idx += 1 + extra[idx]; |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2874 if (/* Compare the length of the name. */ |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2875 name_len == extra[idx] |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2876 /* Compare the name. */ |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2877 && memcmp (name, &extra[idx + 1], name_len) == 0) |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2878 /* Yep, this is the entry. */ |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2879 return elem; |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2880 } |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2881 return -1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2882 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2883 |
12571
64d47f001127
regcomp: skip collseq lookup when there are no rules
Ulrich Drepper <drepper@redhat.com>
parents:
12570
diff
changeset
|
2884 /* Local function for parse_bracket_exp used in _LIBC environment. |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2885 Look up the collation sequence value of BR_ELEM. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2886 Return the value if succeeded, UINT_MAX otherwise. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2887 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2888 auto inline unsigned int |
17346
cd38818bce4e
regex: rename remaining __attribute calls to __attribute__.
Gary V. Vaughan <gary@gnu.org>
parents:
17338
diff
changeset
|
2889 __attribute__ ((always_inline)) |
17338
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2890 lookup_collation_sequence_value (bracket_elem_t *br_elem) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2891 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2892 if (br_elem->type == SB_CHAR) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2893 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2894 /* |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2895 if (MB_CUR_MAX == 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2896 */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2897 if (nrules == 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2898 return collseqmb[br_elem->opr.ch]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2899 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2900 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2901 wint_t wc = __btowc (br_elem->opr.ch); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2902 return __collseq_table_lookup (collseqwc, wc); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2903 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2904 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2905 else if (br_elem->type == MB_CHAR) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2906 { |
12571
64d47f001127
regcomp: skip collseq lookup when there are no rules
Ulrich Drepper <drepper@redhat.com>
parents:
12570
diff
changeset
|
2907 if (nrules != 0) |
64d47f001127
regcomp: skip collseq lookup when there are no rules
Ulrich Drepper <drepper@redhat.com>
parents:
12570
diff
changeset
|
2908 return __collseq_table_lookup (collseqwc, br_elem->opr.wch); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2909 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2910 else if (br_elem->type == COLL_SYM) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2911 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2912 size_t sym_name_len = strlen ((char *) br_elem->opr.name); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2913 if (nrules != 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2914 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2915 int32_t elem, idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2916 elem = seek_collating_symbol_entry (br_elem->opr.name, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2917 sym_name_len); |
17338
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2918 if (elem != -1) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2919 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2920 /* We found the entry. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2921 idx = symb_table[2 * elem + 1]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2922 /* Skip the name of collating element name. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2923 idx += 1 + extra[idx]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2924 /* Skip the byte sequence of the collating element. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2925 idx += 1 + extra[idx]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2926 /* Adjust for the alignment. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2927 idx = (idx + 3) & ~3; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2928 /* Skip the multibyte collation sequence value. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2929 idx += sizeof (unsigned int); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2930 /* Skip the wide char sequence of the collating element. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2931 idx += sizeof (unsigned int) * |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2932 (1 + *(unsigned int *) (extra + idx)); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2933 /* Return the collation sequence value. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2934 return *(unsigned int *) (extra + idx); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2935 } |
17338
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2936 else if (sym_name_len == 1) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2937 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2938 /* No valid character. Match it as a single byte |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2939 character. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2940 return collseqmb[br_elem->opr.name[0]]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2941 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2942 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2943 else if (sym_name_len == 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2944 return collseqmb[br_elem->opr.name[0]]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2945 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2946 return UINT_MAX; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2947 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2948 |
16358 | 2949 /* Local function for parse_bracket_exp used in _LIBC environment. |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2950 Build the range expression which starts from START_ELEM, and ends |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2951 at END_ELEM. The result are written to MBCSET and SBCSET. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2952 RANGE_ALLOC is the allocated size of mbcset->range_starts, and |
16358 | 2953 mbcset->range_ends, is a pointer argument since we may |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2954 update it. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2955 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2956 auto inline reg_errcode_t |
17346
cd38818bce4e
regex: rename remaining __attribute calls to __attribute__.
Gary V. Vaughan <gary@gnu.org>
parents:
17338
diff
changeset
|
2957 __attribute__ ((always_inline)) |
17338
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2958 build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
2959 bracket_elem_t *start_elem, bracket_elem_t *end_elem) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2960 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2961 unsigned int ch; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2962 uint32_t start_collseq; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2963 uint32_t end_collseq; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2964 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2965 /* Equivalence Classes and Character Classes can't be a range |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2966 start/end. */ |
39918 | 2967 if (__glibc_unlikely (start_elem->type == EQUIV_CLASS |
2968 || start_elem->type == CHAR_CLASS | |
2969 || end_elem->type == EQUIV_CLASS | |
2970 || end_elem->type == CHAR_CLASS)) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2971 return REG_ERANGE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2972 |
17237
899138bc3a58
regex: implement rational ranges
Paul Eggert <eggert@cs.ucla.edu>
parents:
17234
diff
changeset
|
2973 /* FIXME: Implement rational ranges here, too. */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2974 start_collseq = lookup_collation_sequence_value (start_elem); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2975 end_collseq = lookup_collation_sequence_value (end_elem); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2976 /* Check start/end collation sequence values. */ |
39918 | 2977 if (__glibc_unlikely (start_collseq == UINT_MAX |
2978 || end_collseq == UINT_MAX)) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2979 return REG_ECOLLATE; |
39918 | 2980 if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES) |
2981 && start_collseq > end_collseq)) | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2982 return REG_ERANGE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2983 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2984 /* Got valid collation sequence values, add them as a new entry. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2985 However, if we have no collation elements, and the character set |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2986 is single byte, the single byte character set that we |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2987 build below suffices. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2988 if (nrules > 0 || dfa->mb_cur_max > 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2989 { |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
2990 /* Check the space of the arrays. */ |
39918 | 2991 if (__glibc_unlikely (*range_alloc == mbcset->nranges)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2992 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2993 /* There is not enough space, need realloc. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2994 uint32_t *new_array_start; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2995 uint32_t *new_array_end; |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
2996 Idx new_nranges; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2997 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2998 /* +1 in case of mbcset->nranges is 0. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
2999 new_nranges = 2 * mbcset->nranges + 1; |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3000 new_array_start = re_realloc (mbcset->range_starts, uint32_t, |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3001 new_nranges); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3002 new_array_end = re_realloc (mbcset->range_ends, uint32_t, |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
3003 new_nranges); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3004 |
39918 | 3005 if (__glibc_unlikely (new_array_start == NULL |
3006 || new_array_end == NULL)) | |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
3007 return REG_ESPACE; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3008 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3009 mbcset->range_starts = new_array_start; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3010 mbcset->range_ends = new_array_end; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3011 *range_alloc = new_nranges; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3012 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3013 |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
3014 mbcset->range_starts[mbcset->nranges] = start_collseq; |
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
3015 mbcset->range_ends[mbcset->nranges++] = end_collseq; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3016 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3017 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3018 /* Build the table for single byte characters. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3019 for (ch = 0; ch < SBC_MAX; ch++) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3020 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3021 uint32_t ch_collseq; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3022 /* |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3023 if (MB_CUR_MAX == 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3024 */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3025 if (nrules == 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3026 ch_collseq = collseqmb[ch]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3027 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3028 ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3029 if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3030 bitset_set (sbcset, ch); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3031 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3032 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3033 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3034 |
16358 | 3035 /* Local function for parse_bracket_exp used in _LIBC environment. |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3036 Build the collating element which is represented by NAME. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3037 The result are written to MBCSET and SBCSET. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3038 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a |
16358 | 3039 pointer argument since we may update it. */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3040 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3041 auto inline reg_errcode_t |
17346
cd38818bce4e
regex: rename remaining __attribute calls to __attribute__.
Gary V. Vaughan <gary@gnu.org>
parents:
17338
diff
changeset
|
3042 __attribute__ ((always_inline)) |
17338
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
3043 build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, |
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
3044 Idx *coll_sym_alloc, const unsigned char *name) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3045 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3046 int32_t elem, idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3047 size_t name_len = strlen ((const char *) name); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3048 if (nrules != 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3049 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3050 elem = seek_collating_symbol_entry (name, name_len); |
17338
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
3051 if (elem != -1) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3052 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3053 /* We found the entry. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3054 idx = symb_table[2 * elem + 1]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3055 /* Skip the name of collating element name. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3056 idx += 1 + extra[idx]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3057 } |
17338
1fff19279ff9
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17270
diff
changeset
|
3058 else if (name_len == 1) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3059 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3060 /* No valid character, treat it as a normal |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3061 character. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3062 bitset_set (sbcset, name[0]); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3063 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3064 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3065 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3066 return REG_ECOLLATE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3067 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3068 /* Got valid collation sequence, add it as a new entry. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3069 /* Check the space of the arrays. */ |
39918 | 3070 if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3071 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3072 /* Not enough, realloc it. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3073 /* +1 in case of mbcset->ncoll_syms is 0. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3074 Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3075 /* Use realloc since mbcset->coll_syms is NULL |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3076 if *alloc == 0. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3077 int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3078 new_coll_sym_alloc); |
39918 | 3079 if (__glibc_unlikely (new_coll_syms == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3080 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3081 mbcset->coll_syms = new_coll_syms; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3082 *coll_sym_alloc = new_coll_sym_alloc; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3083 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3084 mbcset->coll_syms[mbcset->ncoll_syms++] = idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3085 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3086 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3087 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3088 { |
39918 | 3089 if (__glibc_unlikely (name_len != 1)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3090 return REG_ECOLLATE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3091 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3092 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3093 bitset_set (sbcset, name[0]); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3094 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3095 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3096 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3097 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3098 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3099 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3100 re_token_t br_token; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3101 re_bitset_ptr_t sbcset; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3102 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3103 re_charset_t *mbcset; |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
3104 Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
3105 Idx equiv_class_alloc = 0, char_class_alloc = 0; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3106 #endif /* not RE_ENABLE_I18N */ |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
3107 bool non_match = false; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3108 bin_tree_t *work_tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3109 int token_len; |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
3110 bool first_round = true; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3111 #ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3112 collseqmb = (const unsigned char *) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3113 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3114 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3115 if (nrules) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3116 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3117 /* |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3118 if (MB_CUR_MAX > 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3119 */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3120 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3121 table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3122 symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3123 _NL_COLLATE_SYMB_TABLEMB); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3124 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3125 _NL_COLLATE_SYMB_EXTRAMB); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3126 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3127 #endif |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3128 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3129 #ifdef RE_ENABLE_I18N |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3130 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3131 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3132 #ifdef RE_ENABLE_I18N |
39918 | 3133 if (__glibc_unlikely (sbcset == NULL || mbcset == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3134 #else |
39918 | 3135 if (__glibc_unlikely (sbcset == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3136 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3137 { |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
3138 re_free (sbcset); |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
3139 #ifdef RE_ENABLE_I18N |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
3140 re_free (mbcset); |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
3141 #endif |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3142 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3143 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3144 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3145 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3146 token_len = peek_token_bracket (token, regexp, syntax); |
39918 | 3147 if (__glibc_unlikely (token->type == END_OF_RE)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3148 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3149 *err = REG_BADPAT; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3150 goto parse_bracket_exp_free_return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3151 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3152 if (token->type == OP_NON_MATCH_LIST) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3153 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3154 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3155 mbcset->non_match = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3156 #endif /* not RE_ENABLE_I18N */ |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
3157 non_match = true; |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3158 if (syntax & RE_HAT_LISTS_NOT_NEWLINE) |
8110
8428e492271f
2007-02-05 Paolo Bonzini <bonzini@gnu.org>
Paolo Bonzini <bonzini@gnu.org>
parents:
8073
diff
changeset
|
3159 bitset_set (sbcset, '\n'); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3160 re_string_skip_bytes (regexp, token_len); /* Skip a token. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3161 token_len = peek_token_bracket (token, regexp, syntax); |
39918 | 3162 if (__glibc_unlikely (token->type == END_OF_RE)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3163 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3164 *err = REG_BADPAT; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3165 goto parse_bracket_exp_free_return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3166 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3167 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3168 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3169 /* We treat the first ']' as a normal character. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3170 if (token->type == OP_CLOSE_BRACKET) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3171 token->type = CHARACTER; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3172 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3173 while (1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3174 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3175 bracket_elem_t start_elem, end_elem; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3176 unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3177 unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3178 reg_errcode_t ret; |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
3179 int token_len2 = 0; |
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
3180 bool is_range_exp = false; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3181 re_token_t token2; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3182 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3183 start_elem.opr.name = start_name_buf; |
18093
00853c226336
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17848
diff
changeset
|
3184 start_elem.type = COLL_SYM; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3185 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3186 syntax, first_round); |
39918 | 3187 if (__glibc_unlikely (ret != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3188 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3189 *err = ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3190 goto parse_bracket_exp_free_return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3191 } |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
3192 first_round = false; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3193 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3194 /* Get information about the next token. We need it in any case. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3195 token_len = peek_token_bracket (token, regexp, syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3196 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3197 /* Do not check for ranges if we know they are not allowed. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3198 if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3199 { |
39918 | 3200 if (__glibc_unlikely (token->type == END_OF_RE)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3201 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3202 *err = REG_EBRACK; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3203 goto parse_bracket_exp_free_return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3204 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3205 if (token->type == OP_CHARSET_RANGE) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3206 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3207 re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3208 token_len2 = peek_token_bracket (&token2, regexp, syntax); |
39918 | 3209 if (__glibc_unlikely (token2.type == END_OF_RE)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3210 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3211 *err = REG_EBRACK; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3212 goto parse_bracket_exp_free_return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3213 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3214 if (token2.type == OP_CLOSE_BRACKET) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3215 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3216 /* We treat the last '-' as a normal character. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3217 re_string_skip_bytes (regexp, -token_len); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3218 token->type = CHARACTER; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3219 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3220 else |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
3221 is_range_exp = true; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3222 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3223 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3224 |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
3225 if (is_range_exp == true) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3226 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3227 end_elem.opr.name = end_name_buf; |
18093
00853c226336
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17848
diff
changeset
|
3228 end_elem.type = COLL_SYM; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3229 ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
3230 dfa, syntax, true); |
39918 | 3231 if (__glibc_unlikely (ret != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3232 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3233 *err = ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3234 goto parse_bracket_exp_free_return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3235 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3236 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3237 token_len = peek_token_bracket (token, regexp, syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3238 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3239 #ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3240 *err = build_range_exp (sbcset, mbcset, &range_alloc, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3241 &start_elem, &end_elem); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3242 #else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3243 # ifdef RE_ENABLE_I18N |
13014
6240e99d3e0c
regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents:
12848
diff
changeset
|
3244 *err = build_range_exp (syntax, sbcset, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3245 dfa->mb_cur_max > 1 ? mbcset : NULL, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3246 &range_alloc, &start_elem, &end_elem); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3247 # else |
13014
6240e99d3e0c
regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents:
12848
diff
changeset
|
3248 *err = build_range_exp (syntax, sbcset, &start_elem, &end_elem); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3249 # endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3250 #endif /* RE_ENABLE_I18N */ |
39918 | 3251 if (__glibc_unlikely (*err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3252 goto parse_bracket_exp_free_return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3253 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3254 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3255 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3256 switch (start_elem.type) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3257 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3258 case SB_CHAR: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3259 bitset_set (sbcset, start_elem.opr.ch); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3260 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3261 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3262 case MB_CHAR: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3263 /* Check whether the array has enough space. */ |
39918 | 3264 if (__glibc_unlikely (mbchar_alloc == mbcset->nmbchars)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3265 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3266 wchar_t *new_mbchars; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3267 /* Not enough, realloc it. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3268 /* +1 in case of mbcset->nmbchars is 0. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3269 mbchar_alloc = 2 * mbcset->nmbchars + 1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3270 /* Use realloc since array is NULL if *alloc == 0. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3271 new_mbchars = re_realloc (mbcset->mbchars, wchar_t, |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3272 mbchar_alloc); |
39918 | 3273 if (__glibc_unlikely (new_mbchars == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3274 goto parse_bracket_exp_espace; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3275 mbcset->mbchars = new_mbchars; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3276 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3277 mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3278 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3279 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3280 case EQUIV_CLASS: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3281 *err = build_equiv_class (sbcset, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3282 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3283 mbcset, &equiv_class_alloc, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3284 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3285 start_elem.opr.name); |
39918 | 3286 if (__glibc_unlikely (*err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3287 goto parse_bracket_exp_free_return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3288 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3289 case COLL_SYM: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3290 *err = build_collating_symbol (sbcset, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3291 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3292 mbcset, &coll_sym_alloc, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3293 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3294 start_elem.opr.name); |
39918 | 3295 if (__glibc_unlikely (*err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3296 goto parse_bracket_exp_free_return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3297 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3298 case CHAR_CLASS: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3299 *err = build_charclass (regexp->trans, sbcset, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3300 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3301 mbcset, &char_class_alloc, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3302 #endif /* RE_ENABLE_I18N */ |
17270
7be3e941fb5b
regex: conform to strict C
Paul Eggert <eggert@cs.ucla.edu>
parents:
17258
diff
changeset
|
3303 (const char *) start_elem.opr.name, |
7be3e941fb5b
regex: conform to strict C
Paul Eggert <eggert@cs.ucla.edu>
parents:
17258
diff
changeset
|
3304 syntax); |
39918 | 3305 if (__glibc_unlikely (*err != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3306 goto parse_bracket_exp_free_return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3307 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3308 default: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3309 assert (0); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3310 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3311 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3312 } |
39918 | 3313 if (__glibc_unlikely (token->type == END_OF_RE)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3314 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3315 *err = REG_EBRACK; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3316 goto parse_bracket_exp_free_return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3317 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3318 if (token->type == OP_CLOSE_BRACKET) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3319 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3320 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3321 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3322 re_string_skip_bytes (regexp, token_len); /* Skip a token. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3323 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3324 /* If it is non-matching list. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3325 if (non_match) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3326 bitset_not (sbcset); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3327 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3328 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3329 /* Ensure only single byte characters are set. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3330 if (dfa->mb_cur_max > 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3331 bitset_mask (sbcset, dfa->sb_char); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3332 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3333 if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3334 || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3335 || mbcset->non_match))) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3336 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3337 bin_tree_t *mbc_tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3338 int sbc_idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3339 /* Build a tree for complex bracket. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3340 dfa->has_mb_node = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3341 br_token.type = COMPLEX_BRACKET; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3342 br_token.opr.mbcset = mbcset; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3343 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); |
39918 | 3344 if (__glibc_unlikely (mbc_tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3345 goto parse_bracket_exp_espace; |
6214
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
3346 for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3347 if (sbcset[sbc_idx]) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3348 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3349 /* If there are no bits set in sbcset, there is no point |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3350 of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ |
6214
afb93b90dcb8
Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents:
6206
diff
changeset
|
3351 if (sbc_idx < BITSET_WORDS) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3352 { |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
3353 /* Build a tree for simple bracket. */ |
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
3354 br_token.type = SIMPLE_BRACKET; |
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
3355 br_token.opr.sbcset = sbcset; |
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
3356 work_tree = create_token_tree (dfa, NULL, NULL, &br_token); |
39918 | 3357 if (__glibc_unlikely (work_tree == NULL)) |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
3358 goto parse_bracket_exp_espace; |
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
3359 |
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
3360 /* Then join them by ALT node. */ |
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
3361 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); |
39918 | 3362 if (__glibc_unlikely (work_tree == NULL)) |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
3363 goto parse_bracket_exp_espace; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3364 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3365 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3366 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3367 re_free (sbcset); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3368 work_tree = mbc_tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3369 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3370 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3371 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3372 #endif /* not RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3373 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3374 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3375 free_charset (mbcset); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3376 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3377 /* Build a tree for simple bracket. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3378 br_token.type = SIMPLE_BRACKET; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3379 br_token.opr.sbcset = sbcset; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3380 work_tree = create_token_tree (dfa, NULL, NULL, &br_token); |
39918 | 3381 if (__glibc_unlikely (work_tree == NULL)) |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
3382 goto parse_bracket_exp_espace; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3383 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3384 return work_tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3385 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3386 parse_bracket_exp_espace: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3387 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3388 parse_bracket_exp_free_return: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3389 re_free (sbcset); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3390 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3391 free_charset (mbcset); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3392 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3393 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3394 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3395 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3396 /* Parse an element in the bracket expression. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3397 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3398 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
3399 parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
3400 re_token_t *token, int token_len, re_dfa_t *dfa, |
6195
25eaa608fc4e
Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6185
diff
changeset
|
3401 reg_syntax_t syntax, bool accept_hyphen) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3402 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3403 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3404 int cur_char_size; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3405 cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3406 if (cur_char_size > 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3407 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3408 elem->type = MB_CHAR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3409 elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp)); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3410 re_string_skip_bytes (regexp, cur_char_size); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3411 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3412 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3413 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3414 re_string_skip_bytes (regexp, token_len); /* Skip a token. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3415 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3416 || token->type == OP_OPEN_EQUIV_CLASS) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3417 return parse_bracket_symbol (elem, regexp, token); |
39918 | 3418 if (__glibc_unlikely (token->type == OP_CHARSET_RANGE) && !accept_hyphen) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3419 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3420 /* A '-' must only appear as anything but a range indicator before |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3421 the closing bracket. Everything else is an error. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3422 re_token_t token2; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3423 (void) peek_token_bracket (&token2, regexp, syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3424 if (token2.type != OP_CLOSE_BRACKET) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3425 /* The actual error value is not standardized since this whole |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3426 case is undefined. But ERANGE makes good sense. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3427 return REG_ERANGE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3428 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3429 elem->type = SB_CHAR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3430 elem->opr.ch = token->opr.c; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3431 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3432 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3433 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3434 /* Parse a bracket symbol in the bracket expression. Bracket symbols are |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3435 such as [:<character_class>:], [.<collating_element>.], and |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3436 [=<equivalent_class>=]. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3437 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3438 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
3439 parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
3440 re_token_t *token) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3441 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3442 unsigned char ch, delim = token->opr.c; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3443 int i = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3444 if (re_string_eoi(regexp)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3445 return REG_EBRACK; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3446 for (;; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3447 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3448 if (i >= BRACKET_NAME_BUF_SIZE) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3449 return REG_EBRACK; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3450 if (token->type == OP_OPEN_CHAR_CLASS) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3451 ch = re_string_fetch_byte_case (regexp); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3452 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3453 ch = re_string_fetch_byte (regexp); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3454 if (re_string_eoi(regexp)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3455 return REG_EBRACK; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3456 if (ch == delim && re_string_peek_byte (regexp, 0) == ']') |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3457 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3458 elem->opr.name[i] = ch; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3459 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3460 re_string_skip_bytes (regexp, 1); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3461 elem->opr.name[i] = '\0'; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3462 switch (token->type) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3463 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3464 case OP_OPEN_COLL_ELEM: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3465 elem->type = COLL_SYM; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3466 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3467 case OP_OPEN_EQUIV_CLASS: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3468 elem->type = EQUIV_CLASS; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3469 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3470 case OP_OPEN_CHAR_CLASS: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3471 elem->type = CHAR_CLASS; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3472 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3473 default: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3474 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3475 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3476 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3477 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3478 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3479 /* Helper function for parse_bracket_exp. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3480 Build the equivalence class which is represented by NAME. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3481 The result are written to MBCSET and SBCSET. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3482 EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes, |
16358 | 3483 is a pointer argument since we may update it. */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3484 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3485 static reg_errcode_t |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3486 #ifdef RE_ENABLE_I18N |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3487 build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3488 Idx *equiv_class_alloc, const unsigned char *name) |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3489 #else /* not RE_ENABLE_I18N */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3490 build_equiv_class (bitset_t sbcset, const unsigned char *name) |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3491 #endif /* not RE_ENABLE_I18N */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3492 { |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3493 #ifdef _LIBC |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3494 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3495 if (nrules != 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3496 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3497 const int32_t *table, *indirect; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3498 const unsigned char *weights, *extra, *cp; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3499 unsigned char char_buf[2]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3500 int32_t idx1, idx2; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3501 unsigned int ch; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3502 size_t len; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3503 /* Calculate the index for equivalence class. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3504 cp = name; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3505 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3506 weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3507 _NL_COLLATE_WEIGHTMB); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3508 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3509 _NL_COLLATE_EXTRAMB); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3510 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3511 _NL_COLLATE_INDIRECTMB); |
18093
00853c226336
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17848
diff
changeset
|
3512 idx1 = findidx (table, indirect, extra, &cp, -1); |
39918 | 3513 if (__glibc_unlikely (idx1 == 0 || *cp != '\0')) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3514 /* This isn't a valid character. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3515 return REG_ECOLLATE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3516 |
16358 | 3517 /* Build single byte matching table for this equivalence class. */ |
12572
b11c0a312a68
regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents:
12571
diff
changeset
|
3518 len = weights[idx1 & 0xffffff]; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3519 for (ch = 0; ch < SBC_MAX; ++ch) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3520 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3521 char_buf[0] = ch; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3522 cp = char_buf; |
18093
00853c226336
regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
17848
diff
changeset
|
3523 idx2 = findidx (table, indirect, extra, &cp, 1); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3524 /* |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3525 idx2 = table[ch]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3526 */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3527 if (idx2 == 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3528 /* This isn't a valid character. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3529 continue; |
12572
b11c0a312a68
regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents:
12571
diff
changeset
|
3530 /* Compare only if the length matches and the collation rule |
b11c0a312a68
regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents:
12571
diff
changeset
|
3531 index is the same. */ |
39758 | 3532 if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24) |
3533 && memcmp (weights + (idx1 & 0xffffff) + 1, | |
3534 weights + (idx2 & 0xffffff) + 1, len) == 0) | |
3535 bitset_set (sbcset, ch); | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3536 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3537 /* Check whether the array has enough space. */ |
39918 | 3538 if (__glibc_unlikely (*equiv_class_alloc == mbcset->nequiv_classes)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3539 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3540 /* Not enough, realloc it. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3541 /* +1 in case of mbcset->nequiv_classes is 0. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3542 Idx new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3543 /* Use realloc since the array is NULL if *alloc == 0. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3544 int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3545 int32_t, |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3546 new_equiv_class_alloc); |
39918 | 3547 if (__glibc_unlikely (new_equiv_classes == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3548 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3549 mbcset->equiv_classes = new_equiv_classes; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3550 *equiv_class_alloc = new_equiv_class_alloc; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3551 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3552 mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3553 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3554 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3555 #endif /* _LIBC */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3556 { |
39918 | 3557 if (__glibc_unlikely (strlen ((const char *) name) != 1)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3558 return REG_ECOLLATE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3559 bitset_set (sbcset, *name); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3560 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3561 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3562 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3563 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3564 /* Helper function for parse_bracket_exp. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3565 Build the character class which is represented by NAME. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3566 The result are written to MBCSET and SBCSET. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3567 CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes, |
16358 | 3568 is a pointer argument since we may update it. */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3569 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3570 static reg_errcode_t |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3571 #ifdef RE_ENABLE_I18N |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3572 build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
3573 re_charset_t *mbcset, Idx *char_class_alloc, |
17258
28b073aabf32
regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents:
17249
diff
changeset
|
3574 const char *class_name, reg_syntax_t syntax) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3575 #else /* not RE_ENABLE_I18N */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3576 build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, |
17258
28b073aabf32
regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents:
17249
diff
changeset
|
3577 const char *class_name, reg_syntax_t syntax) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3578 #endif /* not RE_ENABLE_I18N */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3579 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3580 int i; |
17258
28b073aabf32
regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents:
17249
diff
changeset
|
3581 const char *name = class_name; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3582 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3583 /* In case of REG_ICASE "upper" and "lower" match the both of |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3584 upper and lower cases. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3585 if ((syntax & RE_ICASE) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3586 && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3587 name = "alpha"; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3588 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3589 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3590 /* Check the space of the arrays. */ |
39918 | 3591 if (__glibc_unlikely (*char_class_alloc == mbcset->nchar_classes)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3592 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3593 /* Not enough, realloc it. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3594 /* +1 in case of mbcset->nchar_classes is 0. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3595 Idx new_char_class_alloc = 2 * mbcset->nchar_classes + 1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3596 /* Use realloc since array is NULL if *alloc == 0. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3597 wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3598 new_char_class_alloc); |
39918 | 3599 if (__glibc_unlikely (new_char_classes == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3600 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3601 mbcset->char_classes = new_char_classes; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3602 *char_class_alloc = new_char_class_alloc; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3603 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3604 mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3605 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3606 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3607 #define BUILD_CHARCLASS_LOOP(ctype_func) \ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3608 do { \ |
39918 | 3609 if (__glibc_unlikely (trans != NULL)) \ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3610 { \ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3611 for (i = 0; i < SBC_MAX; ++i) \ |
6729
c5495b5c3f32
Fix space-tab problem. From Jim Meyering.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
3612 if (ctype_func (i)) \ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3613 bitset_set (sbcset, trans[i]); \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3614 } \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3615 else \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3616 { \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3617 for (i = 0; i < SBC_MAX; ++i) \ |
6729
c5495b5c3f32
Fix space-tab problem. From Jim Meyering.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
3618 if (ctype_func (i)) \ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3619 bitset_set (sbcset, i); \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3620 } \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3621 } while (0) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3622 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3623 if (strcmp (name, "alnum") == 0) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3624 BUILD_CHARCLASS_LOOP (isalnum); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3625 else if (strcmp (name, "cntrl") == 0) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3626 BUILD_CHARCLASS_LOOP (iscntrl); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3627 else if (strcmp (name, "lower") == 0) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3628 BUILD_CHARCLASS_LOOP (islower); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3629 else if (strcmp (name, "space") == 0) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3630 BUILD_CHARCLASS_LOOP (isspace); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3631 else if (strcmp (name, "alpha") == 0) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3632 BUILD_CHARCLASS_LOOP (isalpha); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3633 else if (strcmp (name, "digit") == 0) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3634 BUILD_CHARCLASS_LOOP (isdigit); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3635 else if (strcmp (name, "print") == 0) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3636 BUILD_CHARCLASS_LOOP (isprint); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3637 else if (strcmp (name, "upper") == 0) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3638 BUILD_CHARCLASS_LOOP (isupper); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3639 else if (strcmp (name, "blank") == 0) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3640 BUILD_CHARCLASS_LOOP (isblank); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3641 else if (strcmp (name, "graph") == 0) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3642 BUILD_CHARCLASS_LOOP (isgraph); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3643 else if (strcmp (name, "punct") == 0) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3644 BUILD_CHARCLASS_LOOP (ispunct); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3645 else if (strcmp (name, "xdigit") == 0) |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3646 BUILD_CHARCLASS_LOOP (isxdigit); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3647 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3648 return REG_ECTYPE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3649 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3650 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3651 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3652 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3653 static bin_tree_t * |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3654 build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, |
17258
28b073aabf32
regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents:
17249
diff
changeset
|
3655 const char *class_name, |
28b073aabf32
regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents:
17249
diff
changeset
|
3656 const char *extra, bool non_match, |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3657 reg_errcode_t *err) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3658 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3659 re_bitset_ptr_t sbcset; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3660 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3661 re_charset_t *mbcset; |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
3662 Idx alloc = 0; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3663 #endif /* not RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3664 reg_errcode_t ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3665 re_token_t br_token; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3666 bin_tree_t *tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3667 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6236
diff
changeset
|
3668 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); |
39918 | 3669 if (__glibc_unlikely (sbcset == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3670 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3671 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3672 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3673 } |
18217 | 3674 #ifdef RE_ENABLE_I18N |
3675 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); | |
39918 | 3676 if (__glibc_unlikely (mbcset == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3677 { |
18217 | 3678 re_free (sbcset); |
3679 *err = REG_ESPACE; | |
3680 return NULL; | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3681 } |
18217 | 3682 mbcset->non_match = non_match; |
3683 #endif /* RE_ENABLE_I18N */ | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3684 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3685 /* We don't care the syntax in this case. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3686 ret = build_charclass (trans, sbcset, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3687 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3688 mbcset, &alloc, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3689 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3690 class_name, 0); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3691 |
39918 | 3692 if (__glibc_unlikely (ret != REG_NOERROR)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3693 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3694 re_free (sbcset); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3695 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3696 free_charset (mbcset); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3697 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3698 *err = ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3699 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3700 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3701 /* \w match '_' also. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3702 for (; *extra; extra++) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3703 bitset_set (sbcset, *extra); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3704 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3705 /* If it is non-matching list. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3706 if (non_match) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3707 bitset_not (sbcset); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3708 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3709 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3710 /* Ensure only single byte characters are set. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3711 if (dfa->mb_cur_max > 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3712 bitset_mask (sbcset, dfa->sb_char); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3713 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3714 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3715 /* Build a tree for simple bracket. */ |
18325 | 3716 #if defined GCC_LINT || defined lint |
18219
5459f9989448
regex: pacify static checkers
Paul Eggert <eggert@cs.ucla.edu>
parents:
18218
diff
changeset
|
3717 memset (&br_token, 0, sizeof br_token); |
5459f9989448
regex: pacify static checkers
Paul Eggert <eggert@cs.ucla.edu>
parents:
18218
diff
changeset
|
3718 #endif |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3719 br_token.type = SIMPLE_BRACKET; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3720 br_token.opr.sbcset = sbcset; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3721 tree = create_token_tree (dfa, NULL, NULL, &br_token); |
39918 | 3722 if (__glibc_unlikely (tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3723 goto build_word_op_espace; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3724 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3725 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3726 if (dfa->mb_cur_max > 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3727 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3728 bin_tree_t *mbc_tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3729 /* Build a tree for complex bracket. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3730 br_token.type = COMPLEX_BRACKET; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3731 br_token.opr.mbcset = mbcset; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3732 dfa->has_mb_node = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3733 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); |
39918 | 3734 if (__glibc_unlikely (mbc_tree == NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3735 goto build_word_op_espace; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3736 /* Then join them by ALT node. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3737 tree = create_tree (dfa, tree, mbc_tree, OP_ALT); |
39918 | 3738 if (__glibc_likely (mbc_tree != NULL)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3739 return tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3740 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3741 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3742 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3743 free_charset (mbcset); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3744 return tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3745 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3746 #else /* not RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3747 return tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3748 #endif /* not RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3749 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3750 build_word_op_espace: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3751 re_free (sbcset); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3752 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3753 free_charset (mbcset); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3754 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3755 *err = REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3756 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3757 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3758 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3759 /* This is intended for the expressions like "a{1,3}". |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
3760 Fetch a number from 'input', and return the number. |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
3761 Return -1 if the number field is empty like "{,1}". |
16705
54b750a813cb
regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents:
16366
diff
changeset
|
3762 Return RE_DUP_MAX + 1 if the number field is too large. |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
3763 Return -2 if an error occurred. */ |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
3764 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6174
diff
changeset
|
3765 static Idx |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
3766 fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3767 { |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
3768 Idx num = -1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3769 unsigned char c; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3770 while (1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3771 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3772 fetch_token (token, input, syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3773 c = token->opr.c; |
39918 | 3774 if (__glibc_unlikely (token->type == END_OF_RE)) |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
3775 return -2; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3776 if (token->type == OP_CLOSE_DUP_NUM || c == ',') |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3777 break; |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
3778 num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2) |
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
3779 ? -2 |
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
3780 : num == -1 |
16705
54b750a813cb
regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents:
16366
diff
changeset
|
3781 ? c - '0' |
54b750a813cb
regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents:
16366
diff
changeset
|
3782 : MIN (RE_DUP_MAX + 1, num * 10 + c - '0')); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3783 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3784 return num; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3785 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3786 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3787 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3788 static void |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3789 free_charset (re_charset_t *cset) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3790 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3791 re_free (cset->mbchars); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3792 # ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3793 re_free (cset->coll_syms); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3794 re_free (cset->equiv_classes); |
39758 | 3795 # endif |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3796 re_free (cset->range_starts); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3797 re_free (cset->range_ends); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3798 re_free (cset->char_classes); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3799 re_free (cset); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3800 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3801 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3802 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3803 /* Functions for binary tree operation. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3804 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3805 /* Create a tree node. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3806 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3807 static bin_tree_t * |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
3808 create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
3809 re_token_type_t type) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3810 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3811 re_token_t t; |
18325 | 3812 #if defined GCC_LINT || defined lint |
18219
5459f9989448
regex: pacify static checkers
Paul Eggert <eggert@cs.ucla.edu>
parents:
18218
diff
changeset
|
3813 memset (&t, 0, sizeof t); |
5459f9989448
regex: pacify static checkers
Paul Eggert <eggert@cs.ucla.edu>
parents:
18218
diff
changeset
|
3814 #endif |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3815 t.type = type; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3816 return create_token_tree (dfa, left, right, &t); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3817 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3818 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3819 static bin_tree_t * |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
3820 create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
3821 const re_token_t *token) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3822 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3823 bin_tree_t *tree; |
39918 | 3824 if (__glibc_unlikely (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3825 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3826 bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3827 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3828 if (storage == NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3829 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3830 storage->next = dfa->str_tree_storage; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3831 dfa->str_tree_storage = storage; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3832 dfa->str_tree_storage_idx = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3833 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3834 tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3835 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3836 tree->parent = NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3837 tree->left = left; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3838 tree->right = right; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3839 tree->token = *token; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3840 tree->token.duplicated = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3841 tree->token.opt_subexp = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3842 tree->first = NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3843 tree->next = NULL; |
18253
8367bee10021
regex: make it closer to libc
Paul Eggert <eggert@cs.ucla.edu>
parents:
18252
diff
changeset
|
3844 tree->node_idx = -1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3845 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3846 if (left != NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3847 left->parent = tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3848 if (right != NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3849 right->parent = tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3850 return tree; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3851 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3852 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3853 /* Mark the tree SRC as an optional subexpression. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3854 To be called from preorder or postorder. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3855 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3856 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
3857 mark_opt_subexp (void *extra, bin_tree_t *node) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3858 { |
16912
1591c84dbb2d
regex: avoid warning when pointers are not long
Paul Eggert <eggert@cs.ucla.edu>
parents:
16882
diff
changeset
|
3859 Idx idx = (uintptr_t) extra; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3860 if (node->token.type == SUBEXP && node->token.opr.idx == idx) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3861 node->token.opt_subexp = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3862 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3863 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3864 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3865 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3866 /* Free the allocated memory inside NODE. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3867 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3868 static void |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3869 free_token (re_token_t *node) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3870 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3871 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3872 if (node->type == COMPLEX_BRACKET && node->duplicated == 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3873 free_charset (node->opr.mbcset); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3874 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3875 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3876 if (node->type == SIMPLE_BRACKET && node->duplicated == 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3877 re_free (node->opr.sbcset); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3878 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3879 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3880 /* Worker function for tree walking. Free the allocated memory inside NODE |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3881 and its children. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3882 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3883 static reg_errcode_t |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3884 free_tree (void *extra, bin_tree_t *node) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3885 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3886 free_token (&node->token); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3887 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3888 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3889 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3890 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3891 /* Duplicate the node SRC, and return new node. This is a preorder |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3892 visit similar to the one implemented by the generic visitor, but |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3893 we need more infrastructure to maintain two parallel trees --- so, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3894 it's easier to duplicate. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3895 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3896 static bin_tree_t * |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
3897 duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3898 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3899 const bin_tree_t *node; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3900 bin_tree_t *dup_root; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3901 bin_tree_t **p_new = &dup_root, *dup_node = root->parent; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3902 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3903 for (node = root; ; ) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3904 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3905 /* Create a new tree and link it back to the current parent. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3906 *p_new = create_token_tree (dfa, NULL, NULL, &node->token); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3907 if (*p_new == NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3908 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3909 (*p_new)->parent = dup_node; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3910 (*p_new)->token.duplicated = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3911 dup_node = *p_new; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3912 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3913 /* Go to the left node, or up and to the right. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3914 if (node->left) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3915 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3916 node = node->left; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3917 p_new = &dup_node->left; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3918 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3919 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3920 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3921 const bin_tree_t *prev = NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3922 while (node->right == prev || node->right == NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3923 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3924 prev = node; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3925 node = node->parent; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3926 dup_node = dup_node->parent; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3927 if (!node) |
12830
b8269a183e3e
regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents:
12829
diff
changeset
|
3928 return dup_root; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3929 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3930 node = node->right; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3931 p_new = &dup_node->right; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3932 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3933 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3934 } |