annotate lib/regcomp.c @ 17412:6105f1dfb98e

c-ctype, regex, verify: port to gcc -std=c90 -pedantic Avoid constructions that are rejected by gcc -std=c90 -pedantic. This fixes a porting bug I recently reintroduced in regex, and some other instances that I discovered while testing the fix. * lib/c-ctype.h [__STRICT_ANSI__]: Avoid ({ ... }). * lib/regcomp.c (utf8_sb_map) [__STRICT_ANSI__]: Avoid [0 ... N] = E. * lib/regex_internal.h [!_LIBC && GNULIB_LOCK]: Do not use a macro with an empty argument if this is a pedantic pre-C99 GCC. * lib/verify.h: Do not use _Static_assert if this is a pedantic pre-C11 GCC.
author Paul Eggert <eggert@cs.ucla.edu>
date Wed, 29 May 2013 18:48:09 -0700
parents 020c917cba9d
children 344018b6e5d7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1 /* Extended regular expression matching and search library.
17249
e542fd46ad6f maint: update all copyright year number ranges
Eric Blake <eblake@redhat.com>
parents: 17237
diff changeset
2 Copyright (C) 2002-2013 Free Software Foundation, Inc.
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3 This file is part of the GNU C Library.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
5
17233
b6b08f30c630 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16950
diff changeset
6 The GNU C Library is free software; you can redistribute it and/or
b6b08f30c630 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16950
diff changeset
7 modify it under the terms of the GNU Lesser General Public
b6b08f30c630 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16950
diff changeset
8 License as published by the Free Software Foundation; either
b6b08f30c630 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16950
diff changeset
9 version 2.1 of the License, or (at your option) any later version.
b6b08f30c630 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16950
diff changeset
10
b6b08f30c630 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16950
diff changeset
11 The GNU C Library is distributed in the hope that it will be useful,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
17233
b6b08f30c630 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16950
diff changeset
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
b6b08f30c630 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16950
diff changeset
14 Lesser General Public License for more details.
b6b08f30c630 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16950
diff changeset
15
b6b08f30c630 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16950
diff changeset
16 You should have received a copy of the GNU Lesser General Public
b6b08f30c630 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16950
diff changeset
17 License along with the GNU C Library; if not, see
b6b08f30c630 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16950
diff changeset
18 <http://www.gnu.org/licenses/>. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
19
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
20 static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
21 size_t length, reg_syntax_t syntax);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
22 static void re_compile_fastmap_iter (regex_t *bufp,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
23 const re_dfastate_t *init_state,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
24 char *fastmap);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
25 static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
26 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
27 static void free_charset (re_charset_t *cset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
28 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
29 static void free_workarea_compile (regex_t *preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
30 static reg_errcode_t create_initial_state (re_dfa_t *dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
31 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
32 static void optimize_utf8 (re_dfa_t *dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
33 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
34 static reg_errcode_t analyze (regex_t *preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
35 static reg_errcode_t preorder (bin_tree_t *root,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
36 reg_errcode_t (fn (void *, bin_tree_t *)),
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
37 void *extra);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
38 static reg_errcode_t postorder (bin_tree_t *root,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
39 reg_errcode_t (fn (void *, bin_tree_t *)),
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
40 void *extra);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
41 static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
42 static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
43 static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
44 bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
45 static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
46 static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
47 static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
48 static Idx duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint);
6185
6b09f7f6ba73 * lib/regcomp.c (search_duplicated_node): Make first pointer arg
Paul Eggert <eggert@cs.ucla.edu>
parents: 6184
diff changeset
49 static Idx search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
50 unsigned int constraint);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
51 static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
52 static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
53 Idx node, bool root);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
54 static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
55 static Idx fetch_number (re_string_t *input, re_token_t *token,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
56 reg_syntax_t syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
57 static int peek_token (re_token_t *token, re_string_t *input,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
58 reg_syntax_t syntax) internal_function;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
59 static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
60 reg_syntax_t syntax, reg_errcode_t *err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
61 static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
62 re_token_t *token, reg_syntax_t syntax,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
63 Idx nest, reg_errcode_t *err);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
64 static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
65 re_token_t *token, reg_syntax_t syntax,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
66 Idx nest, reg_errcode_t *err);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
67 static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
68 re_token_t *token, reg_syntax_t syntax,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
69 Idx nest, reg_errcode_t *err);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
70 static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
71 re_token_t *token, reg_syntax_t syntax,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
72 Idx nest, reg_errcode_t *err);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
73 static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
74 re_dfa_t *dfa, re_token_t *token,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
75 reg_syntax_t syntax, reg_errcode_t *err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
76 static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
77 re_token_t *token, reg_syntax_t syntax,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
78 reg_errcode_t *err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
79 static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
80 re_string_t *regexp,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
81 re_token_t *token, int token_len,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
82 re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
83 reg_syntax_t syntax,
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
84 bool accept_hyphen);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
85 static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
86 re_string_t *regexp,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
87 re_token_t *token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
88 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
89 static reg_errcode_t build_equiv_class (bitset_t sbcset,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
90 re_charset_t *mbcset,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
91 Idx *equiv_class_alloc,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
92 const unsigned char *name);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
93 static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
94 bitset_t sbcset,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
95 re_charset_t *mbcset,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
96 Idx *char_class_alloc,
17258
28b073aabf32 regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents: 17249
diff changeset
97 const char *class_name,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
98 reg_syntax_t syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
99 #else /* not RE_ENABLE_I18N */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
100 static reg_errcode_t build_equiv_class (bitset_t sbcset,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
101 const unsigned char *name);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
102 static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
103 bitset_t sbcset,
17258
28b073aabf32 regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents: 17249
diff changeset
104 const char *class_name,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
105 reg_syntax_t syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
106 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
107 static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
108 RE_TRANSLATE_TYPE trans,
17258
28b073aabf32 regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents: 17249
diff changeset
109 const char *class_name,
28b073aabf32 regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents: 17249
diff changeset
110 const char *extra,
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
111 bool non_match, reg_errcode_t *err);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
112 static bin_tree_t *create_tree (re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
113 bin_tree_t *left, bin_tree_t *right,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
114 re_token_type_t type);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
115 static bin_tree_t *create_token_tree (re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
116 bin_tree_t *left, bin_tree_t *right,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
117 const re_token_t *token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
118 static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
119 static void free_token (re_token_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
120 static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
121 static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
122
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
123 /* This table gives an error message for each of the error codes listed
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
124 in regex.h. Obviously the order here has to be same as there.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
125 POSIX doesn't require that we do anything for REG_NOERROR,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
126 but why not be nice? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
127
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
128 static const char __re_error_msgid[] =
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
129 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
130 #define REG_NOERROR_IDX 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
131 gettext_noop ("Success") /* REG_NOERROR */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
132 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
133 #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
134 gettext_noop ("No match") /* REG_NOMATCH */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
135 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
136 #define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
137 gettext_noop ("Invalid regular expression") /* REG_BADPAT */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
138 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
139 #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
140 gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
141 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
142 #define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
143 gettext_noop ("Invalid character class name") /* REG_ECTYPE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
144 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
145 #define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
146 gettext_noop ("Trailing backslash") /* REG_EESCAPE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
147 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
148 #define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
149 gettext_noop ("Invalid back reference") /* REG_ESUBREG */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
150 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
151 #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
152 gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
153 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
154 #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
155 gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
156 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
157 #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
158 gettext_noop ("Unmatched \\{") /* REG_EBRACE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
159 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
160 #define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
161 gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
162 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
163 #define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
164 gettext_noop ("Invalid range end") /* REG_ERANGE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
165 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
166 #define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
167 gettext_noop ("Memory exhausted") /* REG_ESPACE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
168 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
169 #define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
170 gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
171 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
172 #define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
173 gettext_noop ("Premature end of regular expression") /* REG_EEND */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
174 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
175 #define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
176 gettext_noop ("Regular expression too big") /* REG_ESIZE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
177 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
178 #define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
179 gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
180 };
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
181
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
182 static const size_t __re_error_msgid_idx[] =
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
183 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
184 REG_NOERROR_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
185 REG_NOMATCH_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
186 REG_BADPAT_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
187 REG_ECOLLATE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
188 REG_ECTYPE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
189 REG_EESCAPE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
190 REG_ESUBREG_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
191 REG_EBRACK_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
192 REG_EPAREN_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
193 REG_EBRACE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
194 REG_BADBR_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
195 REG_ERANGE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
196 REG_ESPACE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
197 REG_BADRPT_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
198 REG_EEND_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
199 REG_ESIZE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
200 REG_ERPAREN_IDX
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
201 };
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
202
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
203 /* Entry points for GNU code. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
204
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
205 /* re_compile_pattern is the GNU regular expression compiler: it
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
206 compiles PATTERN (of length LENGTH) and puts the result in BUFP.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
207 Returns 0 if the pattern was valid, otherwise an error string.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
208
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
209 Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
210 are set in BUFP on entry. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
211
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
212 #ifdef _LIBC
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
213 const char *
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
214 re_compile_pattern (pattern, length, bufp)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
215 const char *pattern;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
216 size_t length;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
217 struct re_pattern_buffer *bufp;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
218 #else /* size_t might promote */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
219 const char *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
220 re_compile_pattern (const char *pattern, size_t length,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
221 struct re_pattern_buffer *bufp)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
222 #endif
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
223 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
224 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
225
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
226 /* And GNU code determines whether or not to get register information
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
227 by passing null for the REGS argument to re_match, etc., not by
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
228 setting no_sub, unless RE_NO_SUB is set. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
229 bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
230
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
231 /* Match anchors at newline. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
232 bufp->newline_anchor = 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
233
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
234 ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
235
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
236 if (!ret)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
237 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
238 return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
239 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
240 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
241 weak_alias (__re_compile_pattern, re_compile_pattern)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
242 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
243
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
244 /* Set by 're_set_syntax' to the current regexp syntax to recognize. Can
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
245 also be assigned to arbitrarily: each pattern buffer stores its own
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
246 syntax, so it can be changed between regex compilations. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
247 /* This has no initializer because initialized variables in Emacs
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
248 become read-only after dumping. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
249 reg_syntax_t re_syntax_options;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
250
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
251
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
252 /* Specify the precise syntax of regexps for compilation. This provides
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
253 for compatibility for various utilities which historically have
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
254 different, incompatible syntaxes.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
255
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
256 The argument SYNTAX is a bit mask comprised of the various bits
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
257 defined in regex.h. We return the old syntax. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
258
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
259 reg_syntax_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
260 re_set_syntax (syntax)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
261 reg_syntax_t syntax;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
262 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
263 reg_syntax_t ret = re_syntax_options;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
264
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
265 re_syntax_options = syntax;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
266 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
267 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
268 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
269 weak_alias (__re_set_syntax, re_set_syntax)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
270 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
271
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
272 int
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
273 re_compile_fastmap (bufp)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
274 struct re_pattern_buffer *bufp;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
275 {
16770
e011e0a7ab5a regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents: 16730
diff changeset
276 re_dfa_t *dfa = bufp->buffer;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
277 char *fastmap = bufp->fastmap;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
278
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
279 memset (fastmap, '\0', sizeof (char) * SBC_MAX);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
280 re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
281 if (dfa->init_state != dfa->init_state_word)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
282 re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
283 if (dfa->init_state != dfa->init_state_nl)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
284 re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
285 if (dfa->init_state != dfa->init_state_begbuf)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
286 re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
287 bufp->fastmap_accurate = 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
288 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
289 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
290 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
291 weak_alias (__re_compile_fastmap, re_compile_fastmap)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
292 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
293
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
294 static inline void
17346
cd38818bce4e regex: rename remaining __attribute calls to __attribute__.
Gary V. Vaughan <gary@gnu.org>
parents: 17338
diff changeset
295 __attribute__ ((always_inline))
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
296 re_set_fastmap (char *fastmap, bool icase, int ch)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
297 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
298 fastmap[ch] = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
299 if (icase)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
300 fastmap[tolower (ch)] = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
301 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
302
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
303 /* Helper function for re_compile_fastmap.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
304 Compile fastmap for the initial_state INIT_STATE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
305
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
306 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
307 re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
308 char *fastmap)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
309 {
16770
e011e0a7ab5a regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents: 16730
diff changeset
310 re_dfa_t *dfa = bufp->buffer;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
311 Idx node_cnt;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
312 bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
313 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
314 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
315 Idx node = init_state->nodes.elems[node_cnt];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
316 re_token_type_t type = dfa->nodes[node].type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
317
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
318 if (type == CHARACTER)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
319 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
320 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
321 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
322 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
323 {
6119
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6104
diff changeset
324 unsigned char buf[MB_LEN_MAX];
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6104
diff changeset
325 unsigned char *p;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
326 wchar_t wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
327 mbstate_t state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
328
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
329 p = buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
330 *p++ = dfa->nodes[node].opr.c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
331 while (++node < dfa->nodes_len
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
332 && dfa->nodes[node].type == CHARACTER
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
333 && dfa->nodes[node].mb_partial)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
334 *p++ = dfa->nodes[node].opr.c;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
335 memset (&state, '\0', sizeof (state));
10998
cc7a1af3872f regex: replace mbrtowc with __mbrtowc.
Paolo Bonzini <bonzini@gnu.org>
parents: 10078
diff changeset
336 if (__mbrtowc (&wc, (const char *) buf, p - buf,
cc7a1af3872f regex: replace mbrtowc with __mbrtowc.
Paolo Bonzini <bonzini@gnu.org>
parents: 10078
diff changeset
337 &state) == p - buf
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
338 && (__wcrtomb ((char *) buf, towlower (wc), &state)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
339 != (size_t) -1))
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
340 re_set_fastmap (fastmap, false, buf[0]);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
341 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
342 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
343 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
344 else if (type == SIMPLE_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
345 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
346 int i, ch;
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
347 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
348 {
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
349 int j;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
350 bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
351 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
352 if (w & ((bitset_word_t) 1 << j))
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
353 re_set_fastmap (fastmap, icase, ch);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
354 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
355 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
356 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
357 else if (type == COMPLEX_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
358 {
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
359 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
360 Idx i;
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
361
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
362 # ifdef _LIBC
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
363 /* See if we have to try all bytes which start multiple collation
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
364 elements.
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
365 e.g. In da_DK, we want to catch 'a' since "aa" is a valid
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
366 collation element, and don't catch 'b' since 'b' is
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
367 the only collation element which starts from 'b' (and
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
368 it is caught by SIMPLE_BRACKET). */
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
369 if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
370 && (cset->ncoll_syms || cset->nranges))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
371 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
372 const int32_t *table = (const int32_t *)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
373 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
374 for (i = 0; i < SBC_MAX; ++i)
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
375 if (table[i] < 0)
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
376 re_set_fastmap (fastmap, icase, i);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
377 }
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
378 # endif /* _LIBC */
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
379
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
380 /* See if we have to start the match at all multibyte characters,
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
381 i.e. where we would not find an invalid sequence. This only
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
382 applies to multibyte character sets; for single byte character
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
383 sets, the SIMPLE_BRACKET again suffices. */
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
384 if (dfa->mb_cur_max > 1
12351
0b80ccdc9aa4 regex: Fix fastmap for multibyte character ranges.
Paolo Bonzini <bonzini@gnu.org>
parents: 11000
diff changeset
385 && (cset->nchar_classes || cset->non_match || cset->nranges
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
386 # ifdef _LIBC
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
387 || cset->nequiv_classes
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
388 # endif /* _LIBC */
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
389 ))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
390 {
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
391 unsigned char c = 0;
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
392 do
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
393 {
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
394 mbstate_t mbs;
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
395 memset (&mbs, 0, sizeof (mbs));
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
396 if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
397 re_set_fastmap (fastmap, false, (int) c);
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
398 }
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
399 while (++c != 0);
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
400 }
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
401
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
402 else
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
403 {
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
404 /* ... Else catch all bytes which can start the mbchars. */
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
405 for (i = 0; i < cset->nmbchars; ++i)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
406 {
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
407 char buf[256];
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
408 mbstate_t state;
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
409 memset (&state, '\0', sizeof (state));
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
410 if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
411 re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
412 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
413 {
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
414 if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
415 != (size_t) -1)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
416 re_set_fastmap (fastmap, false, *(unsigned char *) buf);
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
417 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
418 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
419 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
420 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
421 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
422 else if (type == OP_PERIOD
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
423 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
424 || type == OP_UTF8_PERIOD
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
425 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
426 || type == END_OF_RE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
427 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
428 memset (fastmap, '\1', sizeof (char) * SBC_MAX);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
429 if (type == END_OF_RE)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
430 bufp->can_be_null = 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
431 return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
432 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
433 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
434 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
435
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
436 /* Entry point for POSIX code. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
437 /* regcomp takes a regular expression as a string and compiles it.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
438
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
439 PREG is a regex_t *. We do not expect any fields to be initialized,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
440 since POSIX says we shouldn't. Thus, we set
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
441
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
442 'buffer' to the compiled pattern;
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
443 'used' to the length of the compiled pattern;
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
444 'syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
445 REG_EXTENDED bit in CFLAGS is set; otherwise, to
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
446 RE_SYNTAX_POSIX_BASIC;
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
447 'newline_anchor' to REG_NEWLINE being set in CFLAGS;
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
448 'fastmap' to an allocated space for the fastmap;
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
449 'fastmap_accurate' to zero;
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
450 're_nsub' to the number of subexpressions in PATTERN.
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
451
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
452 PATTERN is the address of the pattern string.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
453
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
454 CFLAGS is a series of bits which affect compilation.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
455
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
456 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
457 use POSIX basic syntax.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
458
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
459 If REG_NEWLINE is set, then . and [^...] don't match newline.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
460 Also, regexec will try a match beginning after every newline.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
461
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
462 If REG_ICASE is set, then we considers upper- and lowercase
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
463 versions of letters to be equivalent when matching.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
464
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
465 If REG_NOSUB is set, then when PREG is passed to regexec, that
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
466 routine will report only success or failure, and nothing about the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
467 registers.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
468
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
469 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
470 the return codes and their meanings.) */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
471
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
472 int
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
473 regcomp (preg, pattern, cflags)
8045
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
474 regex_t *_Restrict_ preg;
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
475 const char *_Restrict_ pattern;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
476 int cflags;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
477 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
478 reg_errcode_t ret;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
479 reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
480 : RE_SYNTAX_POSIX_BASIC);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
481
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
482 preg->buffer = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
483 preg->allocated = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
484 preg->used = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
485
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
486 /* Try to allocate space for the fastmap. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
487 preg->fastmap = re_malloc (char, SBC_MAX);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
488 if (BE (preg->fastmap == NULL, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
489 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
490
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
491 syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
492
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
493 /* If REG_NEWLINE is set, newlines are treated differently. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
494 if (cflags & REG_NEWLINE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
495 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
496 syntax &= ~RE_DOT_NEWLINE;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
497 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
498 /* It also changes the matching behavior. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
499 preg->newline_anchor = 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
500 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
501 else
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
502 preg->newline_anchor = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
503 preg->no_sub = !!(cflags & REG_NOSUB);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
504 preg->translate = NULL;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
505
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
506 ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
507
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
508 /* POSIX doesn't distinguish between an unmatched open-group and an
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
509 unmatched close-group: both are REG_EPAREN. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
510 if (ret == REG_ERPAREN)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
511 ret = REG_EPAREN;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
512
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
513 /* We have already checked preg->fastmap != NULL. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
514 if (BE (ret == REG_NOERROR, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
515 /* Compute the fastmap now, since regexec cannot modify the pattern
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
516 buffer. This function never fails in this implementation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
517 (void) re_compile_fastmap (preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
518 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
519 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
520 /* Some error occurred while compiling the expression. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
521 re_free (preg->fastmap);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
522 preg->fastmap = NULL;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
523 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
524
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
525 return (int) ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
526 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
527 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
528 weak_alias (__regcomp, regcomp)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
529 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
530
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
531 /* Returns a message corresponding to an error code, ERRCODE, returned
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
532 from either regcomp or regexec. We don't use PREG here. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
533
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
534 #ifdef _LIBC
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
535 size_t
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
536 regerror (errcode, preg, errbuf, errbuf_size)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
537 int errcode;
8045
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
538 const regex_t *_Restrict_ preg;
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
539 char *_Restrict_ errbuf;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
540 size_t errbuf_size;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
541 #else /* size_t might promote */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
542 size_t
8045
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
543 regerror (int errcode, const regex_t *_Restrict_ preg,
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
544 char *_Restrict_ errbuf, size_t errbuf_size)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
545 #endif
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
546 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
547 const char *msg;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
548 size_t msg_size;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
549
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
550 if (BE (errcode < 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
551 || errcode >= (int) (sizeof (__re_error_msgid_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
552 / sizeof (__re_error_msgid_idx[0])), 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
553 /* Only error codes returned by the rest of the code should be passed
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
554 to this routine. If we are given anything else, or if other regex
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
555 code generates an invalid error code, then the program has a bug.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
556 Dump core so we can fix it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
557 abort ();
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
558
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
559 msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
560
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
561 msg_size = strlen (msg) + 1; /* Includes the null. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
562
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
563 if (BE (errbuf_size != 0, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
564 {
8073
eaa00773406b Avoid mempcpy in the regex code, as the string.h mempcpy stuff
Paul Eggert <eggert@cs.ucla.edu>
parents: 8045
diff changeset
565 size_t cpy_size = msg_size;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
566 if (BE (msg_size > errbuf_size, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
567 {
8073
eaa00773406b Avoid mempcpy in the regex code, as the string.h mempcpy stuff
Paul Eggert <eggert@cs.ucla.edu>
parents: 8045
diff changeset
568 cpy_size = errbuf_size - 1;
eaa00773406b Avoid mempcpy in the regex code, as the string.h mempcpy stuff
Paul Eggert <eggert@cs.ucla.edu>
parents: 8045
diff changeset
569 errbuf[cpy_size] = '\0';
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
570 }
8073
eaa00773406b Avoid mempcpy in the regex code, as the string.h mempcpy stuff
Paul Eggert <eggert@cs.ucla.edu>
parents: 8045
diff changeset
571 memcpy (errbuf, msg, cpy_size);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
572 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
573
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
574 return msg_size;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
575 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
576 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
577 weak_alias (__regerror, regerror)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
578 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
579
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
580
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
581 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
582 /* This static array is used for the map to single-byte characters when
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
583 UTF-8 is used. Otherwise we would allocate memory just to initialize
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
584 it the same all the time. UTF-8 is the preferred encoding so this is
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
585 a worthwhile optimization. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
586 static const bitset_t utf8_sb_map =
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
587 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
588 /* Set the first 128 bits. */
17412
6105f1dfb98e c-ctype, regex, verify: port to gcc -std=c90 -pedantic
Paul Eggert <eggert@cs.ucla.edu>
parents: 17408
diff changeset
589 # if defined __GNUC__ && !defined __STRICT_ANSI__
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
590 [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
591 # else
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
592 # if 4 * BITSET_WORD_BITS < ASCII_CHARS
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
593 # error "bitset_word_t is narrower than 32 bits"
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
594 # elif 3 * BITSET_WORD_BITS < ASCII_CHARS
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
595 BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
596 # elif 2 * BITSET_WORD_BITS < ASCII_CHARS
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
597 BITSET_WORD_MAX, BITSET_WORD_MAX,
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
598 # elif 1 * BITSET_WORD_BITS < ASCII_CHARS
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
599 BITSET_WORD_MAX,
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
600 # endif
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
601 (BITSET_WORD_MAX
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
602 >> (SBC_MAX % BITSET_WORD_BITS == 0
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
603 ? 0
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
604 : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
605 # endif
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
606 };
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
607 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
608
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
609
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
610 static void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
611 free_dfa_content (re_dfa_t *dfa)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
612 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
613 Idx i, j;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
614
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
615 if (dfa->nodes)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
616 for (i = 0; i < dfa->nodes_len; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
617 free_token (dfa->nodes + i);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
618 re_free (dfa->nexts);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
619 for (i = 0; i < dfa->nodes_len; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
620 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
621 if (dfa->eclosures != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
622 re_node_set_free (dfa->eclosures + i);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
623 if (dfa->inveclosures != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
624 re_node_set_free (dfa->inveclosures + i);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
625 if (dfa->edests != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
626 re_node_set_free (dfa->edests + i);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
627 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
628 re_free (dfa->edests);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
629 re_free (dfa->eclosures);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
630 re_free (dfa->inveclosures);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
631 re_free (dfa->nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
632
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
633 if (dfa->state_table)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
634 for (i = 0; i <= dfa->state_hash_mask; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
635 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
636 struct re_state_table_entry *entry = dfa->state_table + i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
637 for (j = 0; j < entry->num; ++j)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
638 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
639 re_dfastate_t *state = entry->array[j];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
640 free_state (state);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
641 }
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
642 re_free (entry->array);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
643 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
644 re_free (dfa->state_table);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
645 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
646 if (dfa->sb_char != utf8_sb_map)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
647 re_free (dfa->sb_char);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
648 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
649 re_free (dfa->subexp_map);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
650 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
651 re_free (dfa->re_str);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
652 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
653
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
654 re_free (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
655 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
656
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
657
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
658 /* Free dynamically allocated space used by PREG. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
659
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
660 void
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
661 regfree (preg)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
662 regex_t *preg;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
663 {
16770
e011e0a7ab5a regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents: 16730
diff changeset
664 re_dfa_t *dfa = preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
665 if (BE (dfa != NULL, 1))
17408
020c917cba9d regex: fix dfa race in multithreaded uses
Paul Eggert <eggert@cs.ucla.edu>
parents: 17346
diff changeset
666 {
020c917cba9d regex: fix dfa race in multithreaded uses
Paul Eggert <eggert@cs.ucla.edu>
parents: 17346
diff changeset
667 lock_fini (dfa->lock);
020c917cba9d regex: fix dfa race in multithreaded uses
Paul Eggert <eggert@cs.ucla.edu>
parents: 17346
diff changeset
668 free_dfa_content (dfa);
020c917cba9d regex: fix dfa race in multithreaded uses
Paul Eggert <eggert@cs.ucla.edu>
parents: 17346
diff changeset
669 }
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
670 preg->buffer = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
671 preg->allocated = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
672
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
673 re_free (preg->fastmap);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
674 preg->fastmap = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
675
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
676 re_free (preg->translate);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
677 preg->translate = NULL;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
678 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
679 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
680 weak_alias (__regfree, regfree)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
681 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
682
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
683 /* Entry points compatible with 4.2 BSD regex library. We don't define
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
684 them unless specifically requested. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
685
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
686 #if defined _REGEX_RE_COMP || defined _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
687
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
688 /* BSD has one and only one pattern buffer. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
689 static struct re_pattern_buffer re_comp_buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
690
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
691 char *
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
692 # ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
693 /* Make these definitions weak in libc, so POSIX programs can redefine
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
694 these names if they don't use our functions, and still use
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
695 regcomp/regexec above without link errors. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
696 weak_function
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
697 # endif
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
698 re_comp (s)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
699 const char *s;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
700 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
701 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
702 char *fastmap;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
703
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
704 if (!s)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
705 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
706 if (!re_comp_buf.buffer)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
707 return gettext ("No previous regular expression");
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
708 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
709 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
710
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
711 if (re_comp_buf.buffer)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
712 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
713 fastmap = re_comp_buf.fastmap;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
714 re_comp_buf.fastmap = NULL;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
715 __regfree (&re_comp_buf);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
716 memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
717 re_comp_buf.fastmap = fastmap;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
718 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
719
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
720 if (re_comp_buf.fastmap == NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
721 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
722 re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
723 if (re_comp_buf.fastmap == NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
724 return (char *) gettext (__re_error_msgid
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
725 + __re_error_msgid_idx[(int) REG_ESPACE]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
726 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
727
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
728 /* Since 're_exec' always passes NULL for the 'regs' argument, we
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
729 don't need to initialize the pattern buffer fields which affect it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
730
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
731 /* Match anchors at newlines. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
732 re_comp_buf.newline_anchor = 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
733
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
734 ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
735
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
736 if (!ret)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
737 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
738
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
739 /* Yes, we're discarding 'const' here if !HAVE_LIBINTL. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
740 return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
741 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
742
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
743 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
744 libc_freeres_fn (free_mem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
745 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
746 __regfree (&re_comp_buf);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
747 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
748 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
749
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
750 #endif /* _REGEX_RE_COMP */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
751
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
752 /* Internal entry point.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
753 Compile the regular expression PATTERN, whose length is LENGTH.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
754 SYNTAX indicate regular expression's syntax. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
755
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
756 static reg_errcode_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
757 re_compile_internal (regex_t *preg, const char * pattern, size_t length,
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
758 reg_syntax_t syntax)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
759 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
760 reg_errcode_t err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
761 re_dfa_t *dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
762 re_string_t regexp;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
763
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
764 /* Initialize the pattern buffer. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
765 preg->fastmap_accurate = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
766 preg->syntax = syntax;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
767 preg->not_bol = preg->not_eol = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
768 preg->used = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
769 preg->re_nsub = 0;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
770 preg->can_be_null = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
771 preg->regs_allocated = REGS_UNALLOCATED;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
772
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
773 /* Initialize the dfa. */
16770
e011e0a7ab5a regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents: 16730
diff changeset
774 dfa = preg->buffer;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
775 if (BE (preg->allocated < sizeof (re_dfa_t), 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
776 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
777 /* If zero allocated, but buffer is non-null, try to realloc
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
778 enough space. This loses if buffer's address is bogus, but
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
779 that is the user's responsibility. If ->buffer is NULL this
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
780 is a simple allocation. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
781 dfa = re_realloc (preg->buffer, re_dfa_t, 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
782 if (dfa == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
783 return REG_ESPACE;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
784 preg->allocated = sizeof (re_dfa_t);
16770
e011e0a7ab5a regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents: 16730
diff changeset
785 preg->buffer = dfa;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
786 }
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
787 preg->used = sizeof (re_dfa_t);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
788
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
789 err = init_dfa (dfa, length);
17408
020c917cba9d regex: fix dfa race in multithreaded uses
Paul Eggert <eggert@cs.ucla.edu>
parents: 17346
diff changeset
790 if (BE (err == REG_NOERROR && lock_init (dfa->lock) != 0, 0))
020c917cba9d regex: fix dfa race in multithreaded uses
Paul Eggert <eggert@cs.ucla.edu>
parents: 17346
diff changeset
791 err = REG_ESPACE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
792 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
793 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
794 free_dfa_content (dfa);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
795 preg->buffer = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
796 preg->allocated = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
797 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
798 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
799 #ifdef DEBUG
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
800 /* Note: length+1 will not overflow since it is checked in init_dfa. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
801 dfa->re_str = re_malloc (char, length + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
802 strncpy (dfa->re_str, pattern, length + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
803 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
804
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
805 err = re_string_construct (&regexp, pattern, length, preg->translate,
10078
f47c913858de Fix violation of <stdbool.h> replacement in regex.
Eric Blake <ebb9@byu.net>
parents: 10075
diff changeset
806 (syntax & RE_ICASE) != 0, dfa);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
807 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
808 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
809 re_compile_internal_free_return:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
810 free_workarea_compile (preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
811 re_string_destruct (&regexp);
17408
020c917cba9d regex: fix dfa race in multithreaded uses
Paul Eggert <eggert@cs.ucla.edu>
parents: 17346
diff changeset
812 lock_fini (dfa->lock);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
813 free_dfa_content (dfa);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
814 preg->buffer = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
815 preg->allocated = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
816 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
817 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
818
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
819 /* Parse the regular expression, and build a structure tree. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
820 preg->re_nsub = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
821 dfa->str_tree = parse (&regexp, preg, syntax, &err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
822 if (BE (dfa->str_tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
823 goto re_compile_internal_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
824
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
825 /* Analyze the tree and create the nfa. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
826 err = analyze (preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
827 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
828 goto re_compile_internal_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
829
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
830 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
831 /* If possible, do searching in single byte encoding to speed things up. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
832 if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
833 optimize_utf8 (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
834 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
835
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
836 /* Then create the initial state of the dfa. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
837 err = create_initial_state (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
838
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
839 /* Release work areas. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
840 free_workarea_compile (preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
841 re_string_destruct (&regexp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
842
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
843 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
844 {
17408
020c917cba9d regex: fix dfa race in multithreaded uses
Paul Eggert <eggert@cs.ucla.edu>
parents: 17346
diff changeset
845 lock_fini (dfa->lock);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
846 free_dfa_content (dfa);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
847 preg->buffer = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
848 preg->allocated = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
849 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
850
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
851 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
852 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
853
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
854 /* Initialize DFA. We use the length of the regular expression PAT_LEN
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
855 as the initial length of some arrays. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
856
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
857 static reg_errcode_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
858 init_dfa (re_dfa_t *dfa, size_t pat_len)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
859 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
860 __re_size_t table_size;
12567
ceb1562f60a5 regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
861 #ifndef _LIBC
16730
d4cc21bf38ab regex: pacify GCC when compiling GRUB
Paul Eggert <eggert@cs.ucla.edu>
parents: 16705
diff changeset
862 const char *codeset_name;
12567
ceb1562f60a5 regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
863 #endif
6733
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
864 #ifdef RE_ENABLE_I18N
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
865 size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
866 #else
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
867 size_t max_i18n_object_size = 0;
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
868 #endif
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
869 size_t max_object_size =
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
870 MAX (sizeof (struct re_state_table_entry),
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
871 MAX (sizeof (re_token_t),
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
872 MAX (sizeof (re_node_set),
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
873 MAX (sizeof (regmatch_t),
6733
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
874 max_i18n_object_size))));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
875
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
876 memset (dfa, '\0', sizeof (re_dfa_t));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
877
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
878 /* Force allocation of str_tree_storage the first time. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
879 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
880
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
881 /* Avoid overflows. The extra "/ 2" is for the table_size doubling
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
882 calculation below, and for similar doubling calculations
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
883 elsewhere. And it's <= rather than <, because some of the
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
884 doubling calculations add 1 afterwards. */
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
885 if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) / 2 <= pat_len, 0))
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
886 return REG_ESPACE;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
887
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
888 dfa->nodes_alloc = pat_len + 1;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
889 dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
890
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
891 /* table_size = 2 ^ ceil(log pat_len) */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
892 for (table_size = 1; ; table_size <<= 1)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
893 if (table_size > pat_len)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
894 break;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
895
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
896 dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
897 dfa->state_hash_mask = table_size - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
898
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
899 dfa->mb_cur_max = MB_CUR_MAX;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
900 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
901 if (dfa->mb_cur_max == 6
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
902 && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
903 dfa->is_utf8 = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
904 dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
905 != 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
906 #else
12567
ceb1562f60a5 regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
907 codeset_name = nl_langinfo (CODESET);
16950
87796549f866 regex: use locale-independent comparison for codeset name
Paul Eggert <eggert@cs.ucla.edu>
parents: 16912
diff changeset
908 if ((codeset_name[0] == 'U' || codeset_name[0] == 'u')
87796549f866 regex: use locale-independent comparison for codeset name
Paul Eggert <eggert@cs.ucla.edu>
parents: 16912
diff changeset
909 && (codeset_name[1] == 'T' || codeset_name[1] == 't')
87796549f866 regex: use locale-independent comparison for codeset name
Paul Eggert <eggert@cs.ucla.edu>
parents: 16912
diff changeset
910 && (codeset_name[2] == 'F' || codeset_name[2] == 'f')
87796549f866 regex: use locale-independent comparison for codeset name
Paul Eggert <eggert@cs.ucla.edu>
parents: 16912
diff changeset
911 && strcmp (codeset_name + 3 + (codeset_name[3] == '-'), "8") == 0)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
912 dfa->is_utf8 = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
913
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
914 /* We check exhaustively in the loop below if this charset is a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
915 superset of ASCII. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
916 dfa->map_notascii = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
917 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
918
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
919 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
920 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
921 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
922 if (dfa->is_utf8)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
923 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
924 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
925 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
926 int i, j, ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
927
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
928 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
929 if (BE (dfa->sb_char == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
930 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
931
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
932 /* Set the bits corresponding to single byte chars. */
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
933 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
934 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
935 {
5972
aa260da0bbbe * config/srclist.txt: Comment out regcomp.c, since we have a porting fix
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
936 wint_t wch = __btowc (ch);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
937 if (wch != WEOF)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
938 dfa->sb_char[i] |= (bitset_word_t) 1 << j;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
939 # ifndef _LIBC
5972
aa260da0bbbe * config/srclist.txt: Comment out regcomp.c, since we have a porting fix
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
940 if (isascii (ch) && wch != ch)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
941 dfa->map_notascii = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
942 # endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
943 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
944 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
945 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
946 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
947
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
948 if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
949 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
950 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
951 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
952
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
953 /* Initialize WORD_CHAR table, which indicate which character is
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
954 "word". In this case "word" means that it is the word construction
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
955 character used by some operators like "\<", "\>", etc. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
956
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
957 static void
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
958 internal_function
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
959 init_word_char (re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
960 {
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
961 int i = 0;
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
962 int j;
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
963 int ch = 0;
17234
de636633c6cd regex: port to C89
Paul Eggert <eggert@cs.ucla.edu>
parents: 17233
diff changeset
964 dfa->word_ops_used = 1;
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
965 if (BE (dfa->map_notascii == 0, 1))
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
966 {
16882
551fb0402288 regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents: 16770
diff changeset
967 bitset_word_t bits0 = 0x00000000;
551fb0402288 regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents: 16770
diff changeset
968 bitset_word_t bits1 = 0x03ff0000;
551fb0402288 regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents: 16770
diff changeset
969 bitset_word_t bits2 = 0x87fffffe;
551fb0402288 regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents: 16770
diff changeset
970 bitset_word_t bits3 = 0x07fffffe;
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
971 if (BITSET_WORD_BITS == 64)
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
972 {
16882
551fb0402288 regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents: 16770
diff changeset
973 dfa->word_char[0] = bits1 << 31 << 1 | bits0;
551fb0402288 regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents: 16770
diff changeset
974 dfa->word_char[1] = bits3 << 31 << 1 | bits2;
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
975 i = 2;
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
976 }
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
977 else if (BITSET_WORD_BITS == 32)
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
978 {
16882
551fb0402288 regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents: 16770
diff changeset
979 dfa->word_char[0] = bits0;
551fb0402288 regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents: 16770
diff changeset
980 dfa->word_char[1] = bits1;
551fb0402288 regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents: 16770
diff changeset
981 dfa->word_char[2] = bits2;
551fb0402288 regex: don't assume uint64_t or uint32_t
Paul Eggert <eggert@cs.ucla.edu>
parents: 16770
diff changeset
982 dfa->word_char[3] = bits3;
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
983 i = 4;
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
984 }
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
985 else
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
986 goto general_case;
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
987 ch = 128;
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
988
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
989 if (BE (dfa->is_utf8, 1))
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
990 {
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
991 memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8);
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
992 return;
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
993 }
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
994 }
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
995
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
996 general_case:
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
997 for (; i < BITSET_WORDS; ++i)
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
998 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
999 if (isalnum (ch) || ch == '_')
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1000 dfa->word_char[i] |= (bitset_word_t) 1 << j;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1001 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1002
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1003 /* Free the work area which are only used while compiling. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1004
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1005 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1006 free_workarea_compile (regex_t *preg)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1007 {
16770
e011e0a7ab5a regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents: 16730
diff changeset
1008 re_dfa_t *dfa = preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1009 bin_tree_storage_t *storage, *next;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1010 for (storage = dfa->str_tree_storage; storage; storage = next)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1011 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1012 next = storage->next;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1013 re_free (storage);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1014 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1015 dfa->str_tree_storage = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1016 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1017 dfa->str_tree = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1018 re_free (dfa->org_indices);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1019 dfa->org_indices = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1020 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1021
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1022 /* Create initial states for all contexts. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1023
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1024 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1025 create_initial_state (re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1026 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1027 Idx first, i;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1028 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1029 re_node_set init_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1030
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1031 /* Initial states have the epsilon closure of the node which is
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1032 the first node of the regular expression. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1033 first = dfa->str_tree->first->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1034 dfa->init_node = first;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1035 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1036 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1037 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1038
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1039 /* The back-references which are in initial states can epsilon transit,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1040 since in this case all of the subexpressions can be null.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1041 Then we add epsilon closures of the nodes which are the next nodes of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1042 the back-references. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1043 if (dfa->nbackref > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1044 for (i = 0; i < init_nodes.nelem; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1045 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1046 Idx node_idx = init_nodes.elems[i];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1047 re_token_type_t type = dfa->nodes[node_idx].type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1048
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1049 Idx clexp_idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1050 if (type != OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1051 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1052 for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1053 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1054 re_token_t *clexp_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1055 clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1056 if (clexp_node->type == OP_CLOSE_SUBEXP
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1057 && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1058 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1059 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1060 if (clexp_idx == init_nodes.nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1061 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1062
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1063 if (type == OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1064 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1065 Idx dest_idx = dfa->edests[node_idx].elems[0];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1066 if (!re_node_set_contains (&init_nodes, dest_idx))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1067 {
12847
64dad3a0ba71 regcomp.c: avoid a new -Wshadow warning
Jim Meyering <meyering@redhat.com>
parents: 12831
diff changeset
1068 reg_errcode_t merge_err
64dad3a0ba71 regcomp.c: avoid a new -Wshadow warning
Jim Meyering <meyering@redhat.com>
parents: 12831
diff changeset
1069 = re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
64dad3a0ba71 regcomp.c: avoid a new -Wshadow warning
Jim Meyering <meyering@redhat.com>
parents: 12831
diff changeset
1070 if (merge_err != REG_NOERROR)
64dad3a0ba71 regcomp.c: avoid a new -Wshadow warning
Jim Meyering <meyering@redhat.com>
parents: 12831
diff changeset
1071 return merge_err;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1072 i = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1073 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1074 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1075 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1076
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1077 /* It must be the first time to invoke acquire_state. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1078 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1079 /* We don't check ERR here, since the initial state must not be NULL. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1080 if (BE (dfa->init_state == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1081 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1082 if (dfa->init_state->has_constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1083 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1084 dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1085 CONTEXT_WORD);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1086 dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1087 CONTEXT_NEWLINE);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1088 dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1089 &init_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1090 CONTEXT_NEWLINE
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1091 | CONTEXT_BEGBUF);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1092 if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1093 || dfa->init_state_begbuf == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1094 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1095 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1096 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1097 dfa->init_state_word = dfa->init_state_nl
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1098 = dfa->init_state_begbuf = dfa->init_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1099
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1100 re_node_set_free (&init_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1101 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1102 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1103
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1104 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1105 /* If it is possible to do searching in single byte encoding instead of UTF-8
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1106 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1107 DFA nodes where needed. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1108
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1109 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1110 optimize_utf8 (re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1111 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1112 Idx node;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1113 int i;
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1114 bool mb_chars = false;
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1115 bool has_period = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1116
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1117 for (node = 0; node < dfa->nodes_len; ++node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1118 switch (dfa->nodes[node].type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1119 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1120 case CHARACTER:
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1121 if (dfa->nodes[node].opr.c >= ASCII_CHARS)
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1122 mb_chars = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1123 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1124 case ANCHOR:
9494
7cd817e07a16 Fix a 4-year-old used-uninitialized bug in regcomp.c.
Jim Meyering <meyering@redhat.com>
parents: 8153
diff changeset
1125 switch (dfa->nodes[node].opr.ctx_type)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1126 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1127 case LINE_FIRST:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1128 case LINE_LAST:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1129 case BUF_FIRST:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1130 case BUF_LAST:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1131 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1132 default:
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1133 /* Word anchors etc. cannot be handled. It's okay to test
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1134 opr.ctx_type since constraints (for all DFA nodes) are
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1135 created by ORing one or more opr.ctx_type values. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1136 return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1137 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1138 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1139 case OP_PERIOD:
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1140 has_period = true;
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1141 break;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1142 case OP_BACK_REF:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1143 case OP_ALT:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1144 case END_OF_RE:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1145 case OP_DUP_ASTERISK:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1146 case OP_OPEN_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1147 case OP_CLOSE_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1148 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1149 case COMPLEX_BRACKET:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1150 return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1151 case SIMPLE_BRACKET:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1152 /* Just double check. */
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1153 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1154 int rshift = (ASCII_CHARS % BITSET_WORD_BITS == 0
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1155 ? 0
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1156 : BITSET_WORD_BITS - ASCII_CHARS % BITSET_WORD_BITS);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1157 for (i = ASCII_CHARS / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1158 {
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1159 if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0)
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1160 return;
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1161 rshift = 0;
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1162 }
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1163 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1164 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1165 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1166 abort ();
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1167 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1168
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1169 if (mb_chars || has_period)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1170 for (node = 0; node < dfa->nodes_len; ++node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1171 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1172 if (dfa->nodes[node].type == CHARACTER
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1173 && dfa->nodes[node].opr.c >= ASCII_CHARS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1174 dfa->nodes[node].mb_partial = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1175 else if (dfa->nodes[node].type == OP_PERIOD)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1176 dfa->nodes[node].type = OP_UTF8_PERIOD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1177 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1178
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1179 /* The search can be in single byte locale. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1180 dfa->mb_cur_max = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1181 dfa->is_utf8 = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1182 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1183 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1184 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1185
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1186 /* Analyze the structure tree, and calculate "first", "next", "edest",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1187 "eclosure", and "inveclosure". */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1188
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1189 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1190 analyze (regex_t *preg)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1191 {
16770
e011e0a7ab5a regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents: 16730
diff changeset
1192 re_dfa_t *dfa = preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1193 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1194
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1195 /* Allocate arrays. */
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1196 dfa->nexts = re_malloc (Idx, dfa->nodes_alloc);
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1197 dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1198 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1199 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1200 if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1201 || dfa->eclosures == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1202 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1203
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1204 dfa->subexp_map = re_malloc (Idx, preg->re_nsub);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1205 if (dfa->subexp_map != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1206 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1207 Idx i;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1208 for (i = 0; i < preg->re_nsub; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1209 dfa->subexp_map[i] = i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1210 preorder (dfa->str_tree, optimize_subexps, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1211 for (i = 0; i < preg->re_nsub; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1212 if (dfa->subexp_map[i] != i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1213 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1214 if (i == preg->re_nsub)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1215 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1216 free (dfa->subexp_map);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1217 dfa->subexp_map = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1218 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1219 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1220
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1221 ret = postorder (dfa->str_tree, lower_subexps, preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1222 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1223 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1224 ret = postorder (dfa->str_tree, calc_first, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1225 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1226 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1227 preorder (dfa->str_tree, calc_next, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1228 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1229 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1230 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1231 ret = calc_eclosure (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1232 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1233 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1234
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1235 /* We only need this during the prune_impossible_nodes pass in regexec.c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1236 skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1237 if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1238 || dfa->nbackref)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1239 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1240 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1241 if (BE (dfa->inveclosures == NULL, 0))
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1242 return REG_ESPACE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1243 ret = calc_inveclosure (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1244 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1245
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1246 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1247 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1248
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1249 /* Our parse trees are very unbalanced, so we cannot use a stack to
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1250 implement parse tree visits. Instead, we use parent pointers and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1251 some hairy code in these two functions. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1252 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1253 postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1254 void *extra)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1255 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1256 bin_tree_t *node, *prev;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1257
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1258 for (node = root; ; )
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1259 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1260 /* Descend down the tree, preferably to the left (or to the right
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1261 if that's the only child). */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1262 while (node->left || node->right)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1263 if (node->left)
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1264 node = node->left;
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1265 else
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1266 node = node->right;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1267
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1268 do
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1269 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1270 reg_errcode_t err = fn (extra, node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1271 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1272 return err;
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1273 if (node->parent == NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1274 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1275 prev = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1276 node = node->parent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1277 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1278 /* Go up while we have a node that is reached from the right. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1279 while (node->right == prev || node->right == NULL);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1280 node = node->right;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1281 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1282 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1283
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1284 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1285 preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1286 void *extra)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1287 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1288 bin_tree_t *node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1289
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1290 for (node = root; ; )
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1291 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1292 reg_errcode_t err = fn (extra, node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1293 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1294 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1295
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1296 /* Go to the left node, or up and to the right. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1297 if (node->left)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1298 node = node->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1299 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1300 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1301 bin_tree_t *prev = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1302 while (node->right == prev || node->right == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1303 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1304 prev = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1305 node = node->parent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1306 if (!node)
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1307 return REG_NOERROR;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1308 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1309 node = node->right;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1310 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1311 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1312 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1313
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1314 /* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1315 re_search_internal to map the inner one's opr.idx to this one's. Adjust
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1316 backreferences as well. Requires a preorder visit. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1317 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1318 optimize_subexps (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1319 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1320 re_dfa_t *dfa = (re_dfa_t *) extra;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1321
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1322 if (node->token.type == OP_BACK_REF && dfa->subexp_map)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1323 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1324 int idx = node->token.opr.idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1325 node->token.opr.idx = dfa->subexp_map[idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1326 dfa->used_bkref_map |= 1 << node->token.opr.idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1327 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1328
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1329 else if (node->token.type == SUBEXP
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1330 && node->left && node->left->token.type == SUBEXP)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1331 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1332 Idx other_idx = node->left->token.opr.idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1333
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1334 node->left = node->left->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1335 if (node->left)
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1336 node->left->parent = node;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1337
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1338 dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1339 if (other_idx < BITSET_WORD_BITS)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1340 dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1341 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1342
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1343 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1344 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1345
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1346 /* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1347 of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1348 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1349 lower_subexps (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1350 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1351 regex_t *preg = (regex_t *) extra;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1352 reg_errcode_t err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1353
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1354 if (node->left && node->left->token.type == SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1355 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1356 node->left = lower_subexp (&err, preg, node->left);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1357 if (node->left)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1358 node->left->parent = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1359 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1360 if (node->right && node->right->token.type == SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1361 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1362 node->right = lower_subexp (&err, preg, node->right);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1363 if (node->right)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1364 node->right->parent = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1365 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1366
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1367 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1368 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1369
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1370 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1371 lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1372 {
16770
e011e0a7ab5a regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents: 16730
diff changeset
1373 re_dfa_t *dfa = preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1374 bin_tree_t *body = node->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1375 bin_tree_t *op, *cls, *tree1, *tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1376
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1377 if (preg->no_sub
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1378 /* We do not optimize empty subexpressions, because otherwise we may
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1379 have bad CONCAT nodes with NULL children. This is obviously not
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1380 very common, so we do not lose much. An example that triggers
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1381 this case is the sed "script" /\(\)/x. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1382 && node->left != NULL
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1383 && (node->token.opr.idx >= BITSET_WORD_BITS
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1384 || !(dfa->used_bkref_map
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1385 & ((bitset_word_t) 1 << node->token.opr.idx))))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1386 return node->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1387
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1388 /* Convert the SUBEXP node to the concatenation of an
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1389 OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1390 op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1391 cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1392 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1393 tree = create_tree (dfa, op, tree1, CONCAT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1394 if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1395 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1396 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1397 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1398 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1399
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1400 op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1401 op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1402 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1403 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1404
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1405 /* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1406 nodes. Requires a postorder visit. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1407 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1408 calc_first (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1409 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1410 re_dfa_t *dfa = (re_dfa_t *) extra;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1411 if (node->token.type == CONCAT)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1412 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1413 node->first = node->left->first;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1414 node->node_idx = node->left->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1415 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1416 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1417 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1418 node->first = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1419 node->node_idx = re_dfa_add_node (dfa, node->token);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1420 if (BE (node->node_idx == REG_MISSING, 0))
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1421 return REG_ESPACE;
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1422 if (node->token.type == ANCHOR)
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1423 dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1424 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1425 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1426 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1427
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1428 /* Pass 2: compute NEXT on the tree. Preorder visit. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1429 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1430 calc_next (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1431 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1432 switch (node->token.type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1433 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1434 case OP_DUP_ASTERISK:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1435 node->left->next = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1436 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1437 case CONCAT:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1438 node->left->next = node->right->first;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1439 node->right->next = node->next;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1440 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1441 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1442 if (node->left)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1443 node->left->next = node->next;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1444 if (node->right)
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1445 node->right->next = node->next;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1446 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1447 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1448 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1449 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1450
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1451 /* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1452 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1453 link_nfa_nodes (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1454 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1455 re_dfa_t *dfa = (re_dfa_t *) extra;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1456 Idx idx = node->node_idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1457 reg_errcode_t err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1458
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1459 switch (node->token.type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1460 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1461 case CONCAT:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1462 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1463
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1464 case END_OF_RE:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1465 assert (node->next == NULL);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1466 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1467
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1468 case OP_DUP_ASTERISK:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1469 case OP_ALT:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1470 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1471 Idx left, right;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1472 dfa->has_plural_match = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1473 if (node->left != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1474 left = node->left->first->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1475 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1476 left = node->next->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1477 if (node->right != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1478 right = node->right->first->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1479 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1480 right = node->next->node_idx;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1481 assert (REG_VALID_INDEX (left));
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1482 assert (REG_VALID_INDEX (right));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1483 err = re_node_set_init_2 (dfa->edests + idx, left, right);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1484 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1485 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1486
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1487 case ANCHOR:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1488 case OP_OPEN_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1489 case OP_CLOSE_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1490 err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1491 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1492
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1493 case OP_BACK_REF:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1494 dfa->nexts[idx] = node->next->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1495 if (node->token.type == OP_BACK_REF)
12829
971957a253f8 regcomp.c: do not ignore internal return values
Jim Meyering <meyering@redhat.com>
parents: 12572
diff changeset
1496 err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1497 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1498
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1499 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1500 assert (!IS_EPSILON_NODE (node->token.type));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1501 dfa->nexts[idx] = node->next->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1502 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1503 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1504
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1505 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1506 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1507
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1508 /* Duplicate the epsilon closure of the node ROOT_NODE.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1509 Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1510 to their own constraint. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1511
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1512 static reg_errcode_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1513 internal_function
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1514 duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1515 Idx root_node, unsigned int init_constraint)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1516 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1517 Idx org_node, clone_node;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1518 bool ok;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1519 unsigned int constraint = init_constraint;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1520 for (org_node = top_org_node, clone_node = top_clone_node;;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1521 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1522 Idx org_dest, clone_dest;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1523 if (dfa->nodes[org_node].type == OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1524 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1525 /* If the back reference epsilon-transit, its destination must
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1526 also have the constraint. Then duplicate the epsilon closure
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1527 of the destination of the back reference, and store it in
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1528 edests of the back reference. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1529 org_dest = dfa->nexts[org_node];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1530 re_node_set_empty (dfa->edests + clone_node);
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1531 clone_dest = duplicate_node (dfa, org_dest, constraint);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1532 if (BE (clone_dest == REG_MISSING, 0))
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1533 return REG_ESPACE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1534 dfa->nexts[clone_node] = dfa->nexts[org_node];
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1535 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1536 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1537 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1538 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1539 else if (dfa->edests[org_node].nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1540 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1541 /* In case of the node can't epsilon-transit, don't duplicate the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1542 destination and store the original destination as the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1543 destination of the node. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1544 dfa->nexts[clone_node] = dfa->nexts[org_node];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1545 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1546 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1547 else if (dfa->edests[org_node].nelem == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1548 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1549 /* In case of the node can epsilon-transit, and it has only one
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1550 destination. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1551 org_dest = dfa->edests[org_node].elems[0];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1552 re_node_set_empty (dfa->edests + clone_node);
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1553 /* If the node is root_node itself, it means the epsilon closure
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1554 has a loop. Then tie it to the destination of the root_node. */
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1555 if (org_node == root_node && clone_node != org_node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1556 {
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1557 ok = re_node_set_insert (dfa->edests + clone_node, org_dest);
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1558 if (BE (! ok, 0))
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1559 return REG_ESPACE;
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1560 break;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1561 }
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1562 /* In case the node has another constraint, append it. */
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1563 constraint |= dfa->nodes[org_node].constraint;
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1564 clone_dest = duplicate_node (dfa, org_dest, constraint);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1565 if (BE (clone_dest == REG_MISSING, 0))
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1566 return REG_ESPACE;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1567 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1568 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1569 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1570 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1571 else /* dfa->edests[org_node].nelem == 2 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1572 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1573 /* In case of the node can epsilon-transit, and it has two
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1574 destinations. In the bin_tree_t and DFA, that's '|' and '*'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1575 org_dest = dfa->edests[org_node].elems[0];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1576 re_node_set_empty (dfa->edests + clone_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1577 /* Search for a duplicated node which satisfies the constraint. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1578 clone_dest = search_duplicated_node (dfa, org_dest, constraint);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1579 if (clone_dest == REG_MISSING)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1580 {
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1581 /* There is no such duplicated node, create a new one. */
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1582 reg_errcode_t err;
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1583 clone_dest = duplicate_node (dfa, org_dest, constraint);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1584 if (BE (clone_dest == REG_MISSING, 0))
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1585 return REG_ESPACE;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1586 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1587 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1588 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1589 err = duplicate_node_closure (dfa, org_dest, clone_dest,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1590 root_node, constraint);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1591 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1592 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1593 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1594 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1595 {
12569
51ea446bf1f8 regcomp: fix typo in comment
Jim Meyering <meyering@redhat.com>
parents: 12568
diff changeset
1596 /* There is a duplicated node which satisfies the constraint,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1597 use it to avoid infinite loop. */
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1598 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1599 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1600 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1601 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1602
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1603 org_dest = dfa->edests[org_node].elems[1];
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1604 clone_dest = duplicate_node (dfa, org_dest, constraint);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1605 if (BE (clone_dest == REG_MISSING, 0))
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1606 return REG_ESPACE;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1607 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1608 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1609 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1610 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1611 org_node = org_dest;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1612 clone_node = clone_dest;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1613 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1614 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1615 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1616
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1617 /* Search for a node which is duplicated from the node ORG_NODE, and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1618 satisfies the constraint CONSTRAINT. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1619
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1620 static Idx
6185
6b09f7f6ba73 * lib/regcomp.c (search_duplicated_node): Make first pointer arg
Paul Eggert <eggert@cs.ucla.edu>
parents: 6184
diff changeset
1621 search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1622 unsigned int constraint)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1623 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1624 Idx idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1625 for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1626 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1627 if (org_node == dfa->org_indices[idx]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1628 && constraint == dfa->nodes[idx].constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1629 return idx; /* Found. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1630 }
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1631 return REG_MISSING; /* Not found. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1632 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1633
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1634 /* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1635 Return the index of the new node, or REG_MISSING if insufficient storage is
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1636 available. */
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1637
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1638 static Idx
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1639 duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1640 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1641 Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1642 if (BE (dup_idx != REG_MISSING, 1))
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1643 {
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1644 dfa->nodes[dup_idx].constraint = constraint;
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1645 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1646 dfa->nodes[dup_idx].duplicated = 1;
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1647
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1648 /* Store the index of the original node. */
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1649 dfa->org_indices[dup_idx] = org_idx;
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1650 }
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1651 return dup_idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1652 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1653
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1654 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1655 calc_inveclosure (re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1656 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1657 Idx src, idx;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1658 bool ok;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1659 for (idx = 0; idx < dfa->nodes_len; ++idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1660 re_node_set_init_empty (dfa->inveclosures + idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1661
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1662 for (src = 0; src < dfa->nodes_len; ++src)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1663 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1664 Idx *elems = dfa->eclosures[src].elems;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1665 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1666 {
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1667 ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1668 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1669 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1670 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1671 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1672
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1673 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1674 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1675
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1676 /* Calculate "eclosure" for all the node in DFA. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1677
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1678 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1679 calc_eclosure (re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1680 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1681 Idx node_idx;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1682 bool incomplete;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1683 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1684 assert (dfa->nodes_len > 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1685 #endif
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1686 incomplete = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1687 /* For each nodes, calculate epsilon closure. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1688 for (node_idx = 0; ; ++node_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1689 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1690 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1691 re_node_set eclosure_elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1692 if (node_idx == dfa->nodes_len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1693 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1694 if (!incomplete)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1695 break;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1696 incomplete = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1697 node_idx = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1698 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1699
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1700 #ifdef DEBUG
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1701 assert (dfa->eclosures[node_idx].nelem != REG_MISSING);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1702 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1703
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1704 /* If we have already calculated, skip it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1705 if (dfa->eclosures[node_idx].nelem != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1706 continue;
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
1707 /* Calculate epsilon closure of 'node_idx'. */
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1708 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1709 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1710 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1711
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1712 if (dfa->eclosures[node_idx].nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1713 {
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1714 incomplete = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1715 re_node_set_free (&eclosure_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1716 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1717 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1718 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1719 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1720
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1721 /* Calculate epsilon closure of NODE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1722
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1723 static reg_errcode_t
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1724 calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1725 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1726 reg_errcode_t err;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1727 Idx i;
12831
00cfc5186819 regcomp.c: spelling and merge-artifact from glibc
Jim Meyering <meyering@redhat.com>
parents: 12830
diff changeset
1728 re_node_set eclosure;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1729 bool ok;
12831
00cfc5186819 regcomp.c: spelling and merge-artifact from glibc
Jim Meyering <meyering@redhat.com>
parents: 12830
diff changeset
1730 bool incomplete = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1731 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1732 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1733 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1734
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1735 /* This indicates that we are calculating this node now.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1736 We reference this value to avoid infinite loop. */
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1737 dfa->eclosures[node].nelem = REG_MISSING;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1738
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1739 /* If the current node has constraints, duplicate all nodes
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1740 since they must inherit the constraints. */
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1741 if (dfa->nodes[node].constraint
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1742 && dfa->edests[node].nelem
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1743 && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1744 {
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1745 err = duplicate_node_closure (dfa, node, node, node,
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1746 dfa->nodes[node].constraint);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1747 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1748 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1749 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1750
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1751 /* Expand each epsilon destination nodes. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1752 if (IS_EPSILON_NODE(dfa->nodes[node].type))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1753 for (i = 0; i < dfa->edests[node].nelem; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1754 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1755 re_node_set eclosure_elem;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1756 Idx edest = dfa->edests[node].elems[i];
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
1757 /* If calculating the epsilon closure of 'edest' is in progress,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1758 return intermediate result. */
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1759 if (dfa->eclosures[edest].nelem == REG_MISSING)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1760 {
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1761 incomplete = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1762 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1763 }
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
1764 /* If we haven't calculated the epsilon closure of 'edest' yet,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1765 calculate now. Otherwise use calculated epsilon closure. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1766 if (dfa->eclosures[edest].nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1767 {
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1768 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1769 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1770 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1771 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1772 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1773 eclosure_elem = dfa->eclosures[edest];
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
1774 /* Merge the epsilon closure of 'edest'. */
12829
971957a253f8 regcomp.c: do not ignore internal return values
Jim Meyering <meyering@redhat.com>
parents: 12572
diff changeset
1775 err = re_node_set_merge (&eclosure, &eclosure_elem);
971957a253f8 regcomp.c: do not ignore internal return values
Jim Meyering <meyering@redhat.com>
parents: 12572
diff changeset
1776 if (BE (err != REG_NOERROR, 0))
971957a253f8 regcomp.c: do not ignore internal return values
Jim Meyering <meyering@redhat.com>
parents: 12572
diff changeset
1777 return err;
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
1778 /* If the epsilon closure of 'edest' is incomplete,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1779 the epsilon closure of this node is also incomplete. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1780 if (dfa->eclosures[edest].nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1781 {
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1782 incomplete = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1783 re_node_set_free (&eclosure_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1784 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1785 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1786
12831
00cfc5186819 regcomp.c: spelling and merge-artifact from glibc
Jim Meyering <meyering@redhat.com>
parents: 12830
diff changeset
1787 /* An epsilon closure includes itself. */
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1788 ok = re_node_set_insert (&eclosure, node);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1789 if (BE (! ok, 0))
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1790 return REG_ESPACE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1791 if (incomplete && !root)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1792 dfa->eclosures[node].nelem = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1793 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1794 dfa->eclosures[node] = eclosure;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1795 *new_set = eclosure;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1796 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1797 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1798
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1799 /* Functions for token which are used in the parser. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1800
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1801 /* Fetch a token from INPUT.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1802 We must not use this function inside bracket expressions. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1803
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1804 static void
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1805 internal_function
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1806 fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1807 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1808 re_string_skip_bytes (input, peek_token (result, input, syntax));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1809 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1810
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1811 /* Peek a token from INPUT, and return the length of the token.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1812 We must not use this function inside bracket expressions. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1813
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1814 static int
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1815 internal_function
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1816 peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1817 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1818 unsigned char c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1819
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1820 if (re_string_eoi (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1821 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1822 token->type = END_OF_RE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1823 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1824 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1825
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1826 c = re_string_peek_byte (input, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1827 token->opr.c = c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1828
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1829 token->word_char = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1830 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1831 token->mb_partial = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1832 if (input->mb_cur_max > 1 &&
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1833 !re_string_first_byte (input, re_string_cur_idx (input)))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1834 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1835 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1836 token->mb_partial = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1837 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1838 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1839 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1840 if (c == '\\')
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1841 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1842 unsigned char c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1843 if (re_string_cur_idx (input) + 1 >= re_string_length (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1844 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1845 token->type = BACK_SLASH;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1846 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1847 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1848
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1849 c2 = re_string_peek_byte_case (input, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1850 token->opr.c = c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1851 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1852 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1853 if (input->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1854 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1855 wint_t wc = re_string_wchar_at (input,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1856 re_string_cur_idx (input) + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1857 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1858 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1859 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1860 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1861 token->word_char = IS_WORD_CHAR (c2) != 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1862
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1863 switch (c2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1864 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1865 case '|':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1866 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1867 token->type = OP_ALT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1868 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1869 case '1': case '2': case '3': case '4': case '5':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1870 case '6': case '7': case '8': case '9':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1871 if (!(syntax & RE_NO_BK_REFS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1872 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1873 token->type = OP_BACK_REF;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1874 token->opr.idx = c2 - '1';
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1875 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1876 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1877 case '<':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1878 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1879 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1880 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1881 token->opr.ctx_type = WORD_FIRST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1882 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1883 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1884 case '>':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1885 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1886 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1887 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1888 token->opr.ctx_type = WORD_LAST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1889 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1890 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1891 case 'b':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1892 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1893 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1894 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1895 token->opr.ctx_type = WORD_DELIM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1896 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1897 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1898 case 'B':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1899 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1900 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1901 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1902 token->opr.ctx_type = NOT_WORD_DELIM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1903 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1904 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1905 case 'w':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1906 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1907 token->type = OP_WORD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1908 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1909 case 'W':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1910 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1911 token->type = OP_NOTWORD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1912 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1913 case 's':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1914 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1915 token->type = OP_SPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1916 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1917 case 'S':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1918 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1919 token->type = OP_NOTSPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1920 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1921 case '`':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1922 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1923 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1924 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1925 token->opr.ctx_type = BUF_FIRST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1926 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1927 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1928 case '\'':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1929 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1930 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1931 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1932 token->opr.ctx_type = BUF_LAST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1933 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1934 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1935 case '(':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1936 if (!(syntax & RE_NO_BK_PARENS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1937 token->type = OP_OPEN_SUBEXP;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1938 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1939 case ')':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1940 if (!(syntax & RE_NO_BK_PARENS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1941 token->type = OP_CLOSE_SUBEXP;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1942 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1943 case '+':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1944 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1945 token->type = OP_DUP_PLUS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1946 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1947 case '?':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1948 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1949 token->type = OP_DUP_QUESTION;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1950 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1951 case '{':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1952 if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1953 token->type = OP_OPEN_DUP_NUM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1954 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1955 case '}':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1956 if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1957 token->type = OP_CLOSE_DUP_NUM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1958 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1959 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1960 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1961 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1962 return 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1963 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1964
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1965 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1966 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1967 if (input->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1968 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1969 wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1970 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1971 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1972 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1973 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1974 token->word_char = IS_WORD_CHAR (token->opr.c);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1975
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1976 switch (c)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1977 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1978 case '\n':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1979 if (syntax & RE_NEWLINE_ALT)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1980 token->type = OP_ALT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1981 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1982 case '|':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1983 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1984 token->type = OP_ALT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1985 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1986 case '*':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1987 token->type = OP_DUP_ASTERISK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1988 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1989 case '+':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1990 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1991 token->type = OP_DUP_PLUS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1992 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1993 case '?':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1994 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1995 token->type = OP_DUP_QUESTION;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1996 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1997 case '{':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1998 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1999 token->type = OP_OPEN_DUP_NUM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2000 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2001 case '}':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2002 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2003 token->type = OP_CLOSE_DUP_NUM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2004 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2005 case '(':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2006 if (syntax & RE_NO_BK_PARENS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2007 token->type = OP_OPEN_SUBEXP;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2008 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2009 case ')':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2010 if (syntax & RE_NO_BK_PARENS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2011 token->type = OP_CLOSE_SUBEXP;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2012 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2013 case '[':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2014 token->type = OP_OPEN_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2015 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2016 case '.':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2017 token->type = OP_PERIOD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2018 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2019 case '^':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2020 if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2021 re_string_cur_idx (input) != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2022 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2023 char prev = re_string_peek_byte (input, -1);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2024 if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2025 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2026 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2027 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2028 token->opr.ctx_type = LINE_FIRST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2029 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2030 case '$':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2031 if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2032 re_string_cur_idx (input) + 1 != re_string_length (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2033 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2034 re_token_t next;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2035 re_string_skip_bytes (input, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2036 peek_token (&next, input, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2037 re_string_skip_bytes (input, -1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2038 if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2039 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2040 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2041 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2042 token->opr.ctx_type = LINE_LAST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2043 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2044 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2045 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2046 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2047 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2048 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2049
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2050 /* Peek a token from INPUT, and return the length of the token.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2051 We must not use this function out of bracket expressions. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2052
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2053 static int
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2054 internal_function
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2055 peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2056 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2057 unsigned char c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2058 if (re_string_eoi (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2059 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2060 token->type = END_OF_RE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2061 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2062 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2063 c = re_string_peek_byte (input, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2064 token->opr.c = c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2065
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2066 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2067 if (input->mb_cur_max > 1 &&
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2068 !re_string_first_byte (input, re_string_cur_idx (input)))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2069 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2070 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2071 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2072 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2073 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2074
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2075 if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2076 && re_string_cur_idx (input) + 1 < re_string_length (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2077 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2078 /* In this case, '\' escape a character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2079 unsigned char c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2080 re_string_skip_bytes (input, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2081 c2 = re_string_peek_byte (input, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2082 token->opr.c = c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2083 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2084 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2085 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2086 if (c == '[') /* '[' is a special char in a bracket exps. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2087 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2088 unsigned char c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2089 int token_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2090 if (re_string_cur_idx (input) + 1 < re_string_length (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2091 c2 = re_string_peek_byte (input, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2092 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2093 c2 = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2094 token->opr.c = c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2095 token_len = 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2096 switch (c2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2097 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2098 case '.':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2099 token->type = OP_OPEN_COLL_ELEM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2100 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2101 case '=':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2102 token->type = OP_OPEN_EQUIV_CLASS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2103 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2104 case ':':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2105 if (syntax & RE_CHAR_CLASSES)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2106 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2107 token->type = OP_OPEN_CHAR_CLASS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2108 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2109 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2110 /* else fall through. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2111 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2112 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2113 token->opr.c = c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2114 token_len = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2115 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2116 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2117 return token_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2118 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2119 switch (c)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2120 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2121 case '-':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2122 token->type = OP_CHARSET_RANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2123 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2124 case ']':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2125 token->type = OP_CLOSE_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2126 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2127 case '^':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2128 token->type = OP_NON_MATCH_LIST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2129 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2130 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2131 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2132 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2133 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2134 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2135
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2136 /* Functions for parser. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2137
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2138 /* Entry point of the parser.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2139 Parse the regular expression REGEXP and return the structure tree.
16358
a712776b11ce maint: spelling fixes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16235
diff changeset
2140 If an error occurs, ERR is set by error code, and return NULL.
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2141 This function build the following tree, from regular expression <reg_exp>:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2142 CAT
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2143 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2144 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2145 <reg_exp> EOR
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2146
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2147 CAT means concatenation.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2148 EOR means end of regular expression. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2149
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2150 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2151 parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2152 reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2153 {
16770
e011e0a7ab5a regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents: 16730
diff changeset
2154 re_dfa_t *dfa = preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2155 bin_tree_t *tree, *eor, *root;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2156 re_token_t current_token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2157 dfa->syntax = syntax;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2158 fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2159 tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2160 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2161 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2162 eor = create_tree (dfa, NULL, NULL, END_OF_RE);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2163 if (tree != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2164 root = create_tree (dfa, tree, eor, CONCAT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2165 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2166 root = eor;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2167 if (BE (eor == NULL || root == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2168 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2169 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2170 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2171 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2172 return root;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2173 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2174
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2175 /* This function build the following tree, from regular expression
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2176 <branch1>|<branch2>:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2177 ALT
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2178 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2179 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2180 <branch1> <branch2>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2181
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
2182 ALT means alternative, which represents the operator '|'. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2183
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2184 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2185 parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2186 reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2187 {
16770
e011e0a7ab5a regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents: 16730
diff changeset
2188 re_dfa_t *dfa = preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2189 bin_tree_t *tree, *branch = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2190 tree = parse_branch (regexp, preg, token, syntax, nest, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2191 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2192 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2193
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2194 while (token->type == OP_ALT)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2195 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2196 fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2197 if (token->type != OP_ALT && token->type != END_OF_RE
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2198 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2199 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2200 branch = parse_branch (regexp, preg, token, syntax, nest, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2201 if (BE (*err != REG_NOERROR && branch == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2202 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2203 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2204 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2205 branch = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2206 tree = create_tree (dfa, tree, branch, OP_ALT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2207 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2208 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2209 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2210 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2211 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2212 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2213 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2214 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2215
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2216 /* This function build the following tree, from regular expression
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2217 <exp1><exp2>:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2218 CAT
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2219 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2220 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2221 <exp1> <exp2>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2222
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2223 CAT means concatenation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2224
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2225 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2226 parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2227 reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2228 {
7694
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2229 bin_tree_t *tree, *expr;
16770
e011e0a7ab5a regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents: 16730
diff changeset
2230 re_dfa_t *dfa = preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2231 tree = parse_expression (regexp, preg, token, syntax, nest, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2232 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2233 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2234
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2235 while (token->type != OP_ALT && token->type != END_OF_RE
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2236 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2237 {
7694
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2238 expr = parse_expression (regexp, preg, token, syntax, nest, err);
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2239 if (BE (*err != REG_NOERROR && expr == NULL, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2240 {
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2241 if (tree != NULL)
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2242 postorder (tree, free_tree, NULL);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2243 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2244 }
7694
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2245 if (tree != NULL && expr != NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2246 {
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2247 bin_tree_t *newtree = create_tree (dfa, tree, expr, CONCAT);
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2248 if (newtree == NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2249 {
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2250 postorder (expr, free_tree, NULL);
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2251 postorder (tree, free_tree, NULL);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2252 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2253 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2254 }
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2255 tree = newtree;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2256 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2257 else if (tree == NULL)
7694
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2258 tree = expr;
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2259 /* Otherwise expr == NULL, we don't need to create new tree. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2260 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2261 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2262 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2263
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2264 /* This function build the following tree, from regular expression a*:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2265 *
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2266 |
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2267 a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2268 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2269
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2270 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2271 parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2272 reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2273 {
16770
e011e0a7ab5a regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents: 16730
diff changeset
2274 re_dfa_t *dfa = preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2275 bin_tree_t *tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2276 switch (token->type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2277 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2278 case CHARACTER:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2279 tree = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2280 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2281 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2282 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2283 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2284 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2285 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2286 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2287 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2288 while (!re_string_eoi (regexp)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2289 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2290 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2291 bin_tree_t *mbc_remain;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2292 fetch_token (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2293 mbc_remain = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2294 tree = create_tree (dfa, tree, mbc_remain, CONCAT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2295 if (BE (mbc_remain == NULL || tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2296 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2297 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2298 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2299 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2300 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2301 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2302 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2303 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2304 case OP_OPEN_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2305 tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2306 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2307 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2308 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2309 case OP_OPEN_BRACKET:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2310 tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2311 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2312 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2313 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2314 case OP_BACK_REF:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2315 if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2316 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2317 *err = REG_ESUBREG;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2318 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2319 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2320 dfa->used_bkref_map |= 1 << token->opr.idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2321 tree = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2322 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2323 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2324 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2325 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2326 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2327 ++dfa->nbackref;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2328 dfa->has_mb_node = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2329 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2330 case OP_OPEN_DUP_NUM:
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2331 if (syntax & RE_CONTEXT_INVALID_DUP)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2332 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2333 *err = REG_BADRPT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2334 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2335 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2336 /* FALLTHROUGH */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2337 case OP_DUP_ASTERISK:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2338 case OP_DUP_PLUS:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2339 case OP_DUP_QUESTION:
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2340 if (syntax & RE_CONTEXT_INVALID_OPS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2341 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2342 *err = REG_BADRPT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2343 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2344 }
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2345 else if (syntax & RE_CONTEXT_INDEP_OPS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2346 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2347 fetch_token (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2348 return parse_expression (regexp, preg, token, syntax, nest, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2349 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2350 /* else fall through */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2351 case OP_CLOSE_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2352 if ((token->type == OP_CLOSE_SUBEXP) &&
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2353 !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2354 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2355 *err = REG_ERPAREN;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2356 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2357 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2358 /* else fall through */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2359 case OP_CLOSE_DUP_NUM:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2360 /* We treat it as a normal character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2361
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2362 /* Then we can these characters as normal characters. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2363 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2364 /* mb_partial and word_char bits should be initialized already
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2365 by peek_token. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2366 tree = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2367 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2368 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2369 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2370 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2371 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2372 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2373 case ANCHOR:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2374 if ((token->opr.ctx_type
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2375 & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2376 && dfa->word_ops_used == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2377 init_word_char (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2378 if (token->opr.ctx_type == WORD_DELIM
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2379 || token->opr.ctx_type == NOT_WORD_DELIM)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2380 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2381 bin_tree_t *tree_first, *tree_last;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2382 if (token->opr.ctx_type == WORD_DELIM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2383 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2384 token->opr.ctx_type = WORD_FIRST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2385 tree_first = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2386 token->opr.ctx_type = WORD_LAST;
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2387 }
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2388 else
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2389 {
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2390 token->opr.ctx_type = INSIDE_WORD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2391 tree_first = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2392 token->opr.ctx_type = INSIDE_NOTWORD;
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2393 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2394 tree_last = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2395 tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2396 if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2397 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2398 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2399 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2400 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2401 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2402 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2403 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2404 tree = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2405 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2406 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2407 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2408 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2409 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2410 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2411 /* We must return here, since ANCHORs can't be followed
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2412 by repetition operators.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2413 eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2414 it must not be "<ANCHOR(^)><REPEAT(*)>". */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2415 fetch_token (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2416 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2417 case OP_PERIOD:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2418 tree = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2419 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2420 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2421 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2422 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2423 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2424 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2425 dfa->has_mb_node = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2426 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2427 case OP_WORD:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2428 case OP_NOTWORD:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2429 tree = build_charclass_op (dfa, regexp->trans,
17258
28b073aabf32 regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents: 17249
diff changeset
2430 "alnum",
28b073aabf32 regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents: 17249
diff changeset
2431 "_",
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2432 token->type == OP_NOTWORD, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2433 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2434 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2435 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2436 case OP_SPACE:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2437 case OP_NOTSPACE:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2438 tree = build_charclass_op (dfa, regexp->trans,
17258
28b073aabf32 regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents: 17249
diff changeset
2439 "space",
28b073aabf32 regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents: 17249
diff changeset
2440 "",
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2441 token->type == OP_NOTSPACE, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2442 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2443 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2444 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2445 case OP_ALT:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2446 case END_OF_RE:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2447 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2448 case BACK_SLASH:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2449 *err = REG_EESCAPE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2450 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2451 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2452 /* Must not happen? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2453 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2454 assert (0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2455 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2456 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2457 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2458 fetch_token (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2459
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2460 while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2461 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2462 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2463 tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2464 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2465 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2466 /* In BRE consecutive duplications are not allowed. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2467 if ((syntax & RE_CONTEXT_INVALID_DUP)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2468 && (token->type == OP_DUP_ASTERISK
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2469 || token->type == OP_OPEN_DUP_NUM))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2470 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2471 *err = REG_BADRPT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2472 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2473 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2474 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2475
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2476 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2477 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2478
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2479 /* This function build the following tree, from regular expression
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2480 (<reg_exp>):
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2481 SUBEXP
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2482 |
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2483 <reg_exp>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2484 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2485
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2486 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2487 parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2488 reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2489 {
16770
e011e0a7ab5a regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents: 16730
diff changeset
2490 re_dfa_t *dfa = preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2491 bin_tree_t *tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2492 size_t cur_nsub;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2493 cur_nsub = preg->re_nsub++;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2494
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2495 fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2496
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2497 /* The subexpression may be a null string. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2498 if (token->type == OP_CLOSE_SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2499 tree = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2500 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2501 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2502 tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2503 if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2504 {
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2505 if (tree != NULL)
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2506 postorder (tree, free_tree, NULL);
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2507 *err = REG_EPAREN;
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2508 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2509 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2510 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2511 }
6171
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6170
diff changeset
2512
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6170
diff changeset
2513 if (cur_nsub <= '9' - '1')
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6170
diff changeset
2514 dfa->completed_bkref_map |= 1 << cur_nsub;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2515
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2516 tree = create_tree (dfa, tree, NULL, SUBEXP);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2517 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2518 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2519 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2520 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2521 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2522 tree->token.opr.idx = cur_nsub;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2523 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2524 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2525
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2526 /* This function parse repetition operators like "*", "+", "{1,3}" etc. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2527
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2528 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2529 parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2530 re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2531 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2532 bin_tree_t *tree = NULL, *old_tree = NULL;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2533 Idx i, start, end, start_idx = re_string_cur_idx (regexp);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2534 re_token_t start_token = *token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2535
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2536 if (token->type == OP_OPEN_DUP_NUM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2537 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2538 end = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2539 start = fetch_number (regexp, token, syntax);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2540 if (start == REG_MISSING)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2541 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2542 if (token->type == CHARACTER && token->opr.c == ',')
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2543 start = 0; /* We treat "{,m}" as "{0,m}". */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2544 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2545 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2546 *err = REG_BADBR; /* <re>{} is invalid. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2547 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2548 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2549 }
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2550 if (BE (start != REG_ERROR, 1))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2551 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2552 /* We treat "{n}" as "{n,n}". */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2553 end = ((token->type == OP_CLOSE_DUP_NUM) ? start
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2554 : ((token->type == CHARACTER && token->opr.c == ',')
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2555 ? fetch_number (regexp, token, syntax) : REG_ERROR));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2556 }
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2557 if (BE (start == REG_ERROR || end == REG_ERROR, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2558 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2559 /* Invalid sequence. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2560 if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2561 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2562 if (token->type == END_OF_RE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2563 *err = REG_EBRACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2564 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2565 *err = REG_BADBR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2566
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2567 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2568 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2569
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2570 /* If the syntax bit is set, rollback. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2571 re_string_set_index (regexp, start_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2572 *token = start_token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2573 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2574 /* mb_partial and word_char bits should be already initialized by
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2575 peek_token. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2576 return elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2577 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2578
12570
3ed9d84fee81 regcomp: recognize ill-formed { } expressions
Ulrich Drepper <drepper@redhat.com>
parents: 12569
diff changeset
2579 if (BE ((end != REG_MISSING && start > end)
3ed9d84fee81 regcomp: recognize ill-formed { } expressions
Ulrich Drepper <drepper@redhat.com>
parents: 12569
diff changeset
2580 || token->type != OP_CLOSE_DUP_NUM, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2581 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2582 /* First number greater than second. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2583 *err = REG_BADBR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2584 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2585 }
16705
54b750a813cb regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents: 16366
diff changeset
2586
54b750a813cb regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents: 16366
diff changeset
2587 if (BE (RE_DUP_MAX < (end == REG_MISSING ? start : end), 0))
54b750a813cb regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents: 16366
diff changeset
2588 {
54b750a813cb regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents: 16366
diff changeset
2589 *err = REG_ESIZE;
54b750a813cb regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents: 16366
diff changeset
2590 return NULL;
54b750a813cb regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents: 16366
diff changeset
2591 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2592 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2593 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2594 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2595 start = (token->type == OP_DUP_PLUS) ? 1 : 0;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2596 end = (token->type == OP_DUP_QUESTION) ? 1 : REG_MISSING;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2597 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2598
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2599 fetch_token (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2600
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2601 if (BE (elem == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2602 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2603 if (BE (start == 0 && end == 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2604 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2605 postorder (elem, free_tree, NULL);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2606 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2607 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2608
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2609 /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2610 if (BE (start > 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2611 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2612 tree = elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2613 for (i = 2; i <= start; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2614 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2615 elem = duplicate_tree (elem, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2616 tree = create_tree (dfa, tree, elem, CONCAT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2617 if (BE (elem == NULL || tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2618 goto parse_dup_op_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2619 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2620
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2621 if (start == end)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2622 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2623
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2624 /* Duplicate ELEM before it is marked optional. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2625 elem = duplicate_tree (elem, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2626 old_tree = tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2627 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2628 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2629 old_tree = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2630
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2631 if (elem->token.type == SUBEXP)
16912
1591c84dbb2d regex: avoid warning when pointers are not long
Paul Eggert <eggert@cs.ucla.edu>
parents: 16882
diff changeset
2632 {
1591c84dbb2d regex: avoid warning when pointers are not long
Paul Eggert <eggert@cs.ucla.edu>
parents: 16882
diff changeset
2633 uintptr_t subidx = elem->token.opr.idx;
1591c84dbb2d regex: avoid warning when pointers are not long
Paul Eggert <eggert@cs.ucla.edu>
parents: 16882
diff changeset
2634 postorder (elem, mark_opt_subexp, (void *) subidx);
1591c84dbb2d regex: avoid warning when pointers are not long
Paul Eggert <eggert@cs.ucla.edu>
parents: 16882
diff changeset
2635 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2636
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2637 tree = create_tree (dfa, elem, NULL,
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2638 (end == REG_MISSING ? OP_DUP_ASTERISK : OP_ALT));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2639 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2640 goto parse_dup_op_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2641
12848
69c1f6834276 regcomp.c: avoid the sole warning from gcc's -Wtype-limits
Jim Meyering <meyering@redhat.com>
parents: 12847
diff changeset
2642 /* From gnulib's "intprops.h":
69c1f6834276 regcomp.c: avoid the sole warning from gcc's -Wtype-limits
Jim Meyering <meyering@redhat.com>
parents: 12847
diff changeset
2643 True if the arithmetic type T is signed. */
69c1f6834276 regcomp.c: avoid the sole warning from gcc's -Wtype-limits
Jim Meyering <meyering@redhat.com>
parents: 12847
diff changeset
2644 #define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
69c1f6834276 regcomp.c: avoid the sole warning from gcc's -Wtype-limits
Jim Meyering <meyering@redhat.com>
parents: 12847
diff changeset
2645
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2646 /* This loop is actually executed only when end != REG_MISSING,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2647 to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2648 already created the start+1-th copy. */
12848
69c1f6834276 regcomp.c: avoid the sole warning from gcc's -Wtype-limits
Jim Meyering <meyering@redhat.com>
parents: 12847
diff changeset
2649 if (TYPE_SIGNED (Idx) || end != REG_MISSING)
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2650 for (i = start + 2; i <= end; ++i)
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2651 {
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2652 elem = duplicate_tree (elem, dfa);
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2653 tree = create_tree (dfa, tree, elem, CONCAT);
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2654 if (BE (elem == NULL || tree == NULL, 0))
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2655 goto parse_dup_op_espace;
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2656
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2657 tree = create_tree (dfa, tree, NULL, OP_ALT);
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2658 if (BE (tree == NULL, 0))
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2659 goto parse_dup_op_espace;
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2660 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2661
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2662 if (old_tree)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2663 tree = create_tree (dfa, old_tree, tree, CONCAT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2664
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2665 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2666
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2667 parse_dup_op_espace:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2668 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2669 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2670 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2671
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2672 /* Size of the names for collating symbol/equivalence_class/character_class.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2673 I'm not sure, but maybe enough. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2674 #define BRACKET_NAME_BUF_SIZE 32
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2675
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2676 #ifndef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2677 /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2678 Build the range expression which starts from START_ELEM, and ends
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2679 at END_ELEM. The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2680 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
16358
a712776b11ce maint: spelling fixes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16235
diff changeset
2681 mbcset->range_ends, is a pointer argument since we may
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2682 update it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2683
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2684 static reg_errcode_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2685 internal_function
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2686 # ifdef RE_ENABLE_I18N
13014
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2687 build_range_exp (const reg_syntax_t syntax,
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2688 bitset_t sbcset,
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2689 re_charset_t *mbcset,
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2690 Idx *range_alloc,
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2691 const bracket_elem_t *start_elem,
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2692 const bracket_elem_t *end_elem)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2693 # else /* not RE_ENABLE_I18N */
13014
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2694 build_range_exp (const reg_syntax_t syntax,
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2695 bitset_t sbcset,
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2696 const bracket_elem_t *start_elem,
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2697 const bracket_elem_t *end_elem)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2698 # endif /* not RE_ENABLE_I18N */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2699 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2700 unsigned int start_ch, end_ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2701 /* Equivalence Classes and Character Classes can't be a range start/end. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2702 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2703 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2704 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2705 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2706
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2707 /* We can handle no multi character collating elements without libc
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2708 support. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2709 if (BE ((start_elem->type == COLL_SYM
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2710 && strlen ((char *) start_elem->opr.name) > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2711 || (end_elem->type == COLL_SYM
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2712 && strlen ((char *) end_elem->opr.name) > 1), 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2713 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2714
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2715 # ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2716 {
5972
aa260da0bbbe * config/srclist.txt: Comment out regcomp.c, since we have a porting fix
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
2717 wchar_t wc;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2718 wint_t start_wc;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2719 wint_t end_wc;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2720
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2721 start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2722 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2723 : 0));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2724 end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2725 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2726 : 0));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2727 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2728 ? __btowc (start_ch) : start_elem->opr.wch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2729 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2730 ? __btowc (end_ch) : end_elem->opr.wch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2731 if (start_wc == WEOF || end_wc == WEOF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2732 return REG_ECOLLATE;
17237
899138bc3a58 regex: implement rational ranges
Paul Eggert <eggert@cs.ucla.edu>
parents: 17234
diff changeset
2733 else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2734 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2735
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2736 /* Got valid collation sequence values, add them as a new entry.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2737 However, for !_LIBC we have no collation elements: if the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2738 character set is single byte, the single byte character set
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2739 that we build below suffices. parse_bracket_exp passes
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2740 no MBCSET if dfa->mb_cur_max == 1. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2741 if (mbcset)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2742 {
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2743 /* Check the space of the arrays. */
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2744 if (BE (*range_alloc == mbcset->nranges, 0))
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2745 {
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2746 /* There is not enough space, need realloc. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2747 wchar_t *new_array_start, *new_array_end;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2748 Idx new_nranges;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2749
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2750 /* +1 in case of mbcset->nranges is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2751 new_nranges = 2 * mbcset->nranges + 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2752 /* Use realloc since mbcset->range_starts and mbcset->range_ends
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2753 are NULL if *range_alloc == 0. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2754 new_array_start = re_realloc (mbcset->range_starts, wchar_t,
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2755 new_nranges);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2756 new_array_end = re_realloc (mbcset->range_ends, wchar_t,
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2757 new_nranges);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2758
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2759 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2760 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2761
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2762 mbcset->range_starts = new_array_start;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2763 mbcset->range_ends = new_array_end;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2764 *range_alloc = new_nranges;
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2765 }
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2766
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2767 mbcset->range_starts[mbcset->nranges] = start_wc;
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2768 mbcset->range_ends[mbcset->nranges++] = end_wc;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2769 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2770
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2771 /* Build the table for single byte characters. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2772 for (wc = 0; wc < SBC_MAX; ++wc)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2773 {
17237
899138bc3a58 regex: implement rational ranges
Paul Eggert <eggert@cs.ucla.edu>
parents: 17234
diff changeset
2774 if (start_wc <= wc && wc <= end_wc)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2775 bitset_set (sbcset, wc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2776 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2777 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2778 # else /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2779 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2780 unsigned int ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2781 start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2782 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2783 : 0));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2784 end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2785 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2786 : 0));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2787 if (start_ch > end_ch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2788 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2789 /* Build the table for single byte characters. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2790 for (ch = 0; ch < SBC_MAX; ++ch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2791 if (start_ch <= ch && ch <= end_ch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2792 bitset_set (sbcset, ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2793 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2794 # endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2795 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2796 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2797 #endif /* not _LIBC */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2798
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2799 #ifndef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2800 /* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2801 Build the collating element which is represented by NAME.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2802 The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2803 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2804 pointer argument since we may update it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2805
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2806 static reg_errcode_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2807 internal_function
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2808 # ifdef RE_ENABLE_I18N
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2809 build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2810 Idx *coll_sym_alloc, const unsigned char *name)
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2811 # else /* not RE_ENABLE_I18N */
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2812 build_collating_symbol (bitset_t sbcset, const unsigned char *name)
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
2813 # endif /* not RE_ENABLE_I18N */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2814 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2815 size_t name_len = strlen ((const char *) name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2816 if (BE (name_len != 1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2817 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2818 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2819 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2820 bitset_set (sbcset, name[0]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2821 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2822 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2823 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2824 #endif /* not _LIBC */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2825
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2826 /* This function parse bracket expression like "[abc]", "[a-c]",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2827 "[[.a-a.]]" etc. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2828
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2829 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2830 parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2831 reg_syntax_t syntax, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2832 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2833 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2834 const unsigned char *collseqmb;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2835 const char *collseqwc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2836 uint32_t nrules;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2837 int32_t table_size;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2838 const int32_t *symb_table;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2839 const unsigned char *extra;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2840
16358
a712776b11ce maint: spelling fixes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16235
diff changeset
2841 /* Local function for parse_bracket_exp used in _LIBC environment.
a712776b11ce maint: spelling fixes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16235
diff changeset
2842 Seek the collating symbol entry corresponding to NAME.
17338
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2843 Return the index of the symbol in the SYMB_TABLE,
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2844 or -1 if not found. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2845
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2846 auto inline int32_t
17346
cd38818bce4e regex: rename remaining __attribute calls to __attribute__.
Gary V. Vaughan <gary@gnu.org>
parents: 17338
diff changeset
2847 __attribute__ ((always_inline))
17338
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2848 seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2849 {
17338
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2850 int32_t elem;
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2851
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2852 for (elem = 0; elem < table_size; elem++)
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2853 if (symb_table[2 * elem] != 0)
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2854 {
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2855 int32_t idx = symb_table[2 * elem + 1];
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2856 /* Skip the name of collating element name. */
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2857 idx += 1 + extra[idx];
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2858 if (/* Compare the length of the name. */
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2859 name_len == extra[idx]
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2860 /* Compare the name. */
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2861 && memcmp (name, &extra[idx + 1], name_len) == 0)
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2862 /* Yep, this is the entry. */
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2863 return elem;
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2864 }
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2865 return -1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2866 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2867
12571
64d47f001127 regcomp: skip collseq lookup when there are no rules
Ulrich Drepper <drepper@redhat.com>
parents: 12570
diff changeset
2868 /* Local function for parse_bracket_exp used in _LIBC environment.
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2869 Look up the collation sequence value of BR_ELEM.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2870 Return the value if succeeded, UINT_MAX otherwise. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2871
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2872 auto inline unsigned int
17346
cd38818bce4e regex: rename remaining __attribute calls to __attribute__.
Gary V. Vaughan <gary@gnu.org>
parents: 17338
diff changeset
2873 __attribute__ ((always_inline))
17338
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2874 lookup_collation_sequence_value (bracket_elem_t *br_elem)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2875 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2876 if (br_elem->type == SB_CHAR)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2877 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2878 /*
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2879 if (MB_CUR_MAX == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2880 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2881 if (nrules == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2882 return collseqmb[br_elem->opr.ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2883 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2884 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2885 wint_t wc = __btowc (br_elem->opr.ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2886 return __collseq_table_lookup (collseqwc, wc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2887 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2888 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2889 else if (br_elem->type == MB_CHAR)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2890 {
12571
64d47f001127 regcomp: skip collseq lookup when there are no rules
Ulrich Drepper <drepper@redhat.com>
parents: 12570
diff changeset
2891 if (nrules != 0)
64d47f001127 regcomp: skip collseq lookup when there are no rules
Ulrich Drepper <drepper@redhat.com>
parents: 12570
diff changeset
2892 return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2893 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2894 else if (br_elem->type == COLL_SYM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2895 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2896 size_t sym_name_len = strlen ((char *) br_elem->opr.name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2897 if (nrules != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2898 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2899 int32_t elem, idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2900 elem = seek_collating_symbol_entry (br_elem->opr.name,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2901 sym_name_len);
17338
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2902 if (elem != -1)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2903 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2904 /* We found the entry. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2905 idx = symb_table[2 * elem + 1];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2906 /* Skip the name of collating element name. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2907 idx += 1 + extra[idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2908 /* Skip the byte sequence of the collating element. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2909 idx += 1 + extra[idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2910 /* Adjust for the alignment. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2911 idx = (idx + 3) & ~3;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2912 /* Skip the multibyte collation sequence value. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2913 idx += sizeof (unsigned int);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2914 /* Skip the wide char sequence of the collating element. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2915 idx += sizeof (unsigned int) *
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2916 (1 + *(unsigned int *) (extra + idx));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2917 /* Return the collation sequence value. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2918 return *(unsigned int *) (extra + idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2919 }
17338
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2920 else if (sym_name_len == 1)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2921 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2922 /* No valid character. Match it as a single byte
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2923 character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2924 return collseqmb[br_elem->opr.name[0]];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2925 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2926 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2927 else if (sym_name_len == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2928 return collseqmb[br_elem->opr.name[0]];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2929 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2930 return UINT_MAX;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2931 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2932
16358
a712776b11ce maint: spelling fixes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16235
diff changeset
2933 /* Local function for parse_bracket_exp used in _LIBC environment.
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2934 Build the range expression which starts from START_ELEM, and ends
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2935 at END_ELEM. The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2936 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
16358
a712776b11ce maint: spelling fixes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16235
diff changeset
2937 mbcset->range_ends, is a pointer argument since we may
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2938 update it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2939
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2940 auto inline reg_errcode_t
17346
cd38818bce4e regex: rename remaining __attribute calls to __attribute__.
Gary V. Vaughan <gary@gnu.org>
parents: 17338
diff changeset
2941 __attribute__ ((always_inline))
17338
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2942 build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
2943 bracket_elem_t *start_elem, bracket_elem_t *end_elem)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2944 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2945 unsigned int ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2946 uint32_t start_collseq;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2947 uint32_t end_collseq;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2948
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2949 /* Equivalence Classes and Character Classes can't be a range
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2950 start/end. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2951 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2952 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2953 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2954 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2955
17237
899138bc3a58 regex: implement rational ranges
Paul Eggert <eggert@cs.ucla.edu>
parents: 17234
diff changeset
2956 /* FIXME: Implement rational ranges here, too. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2957 start_collseq = lookup_collation_sequence_value (start_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2958 end_collseq = lookup_collation_sequence_value (end_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2959 /* Check start/end collation sequence values. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2960 if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2961 return REG_ECOLLATE;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2962 if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2963 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2964
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2965 /* Got valid collation sequence values, add them as a new entry.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2966 However, if we have no collation elements, and the character set
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2967 is single byte, the single byte character set that we
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2968 build below suffices. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2969 if (nrules > 0 || dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2970 {
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2971 /* Check the space of the arrays. */
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2972 if (BE (*range_alloc == mbcset->nranges, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2973 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2974 /* There is not enough space, need realloc. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2975 uint32_t *new_array_start;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2976 uint32_t *new_array_end;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2977 Idx new_nranges;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2978
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2979 /* +1 in case of mbcset->nranges is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2980 new_nranges = 2 * mbcset->nranges + 1;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2981 new_array_start = re_realloc (mbcset->range_starts, uint32_t,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2982 new_nranges);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2983 new_array_end = re_realloc (mbcset->range_ends, uint32_t,
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2984 new_nranges);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2985
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2986 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2987 return REG_ESPACE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2988
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2989 mbcset->range_starts = new_array_start;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2990 mbcset->range_ends = new_array_end;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2991 *range_alloc = new_nranges;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2992 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2993
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2994 mbcset->range_starts[mbcset->nranges] = start_collseq;
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2995 mbcset->range_ends[mbcset->nranges++] = end_collseq;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2996 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2997
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2998 /* Build the table for single byte characters. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2999 for (ch = 0; ch < SBC_MAX; ch++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3000 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3001 uint32_t ch_collseq;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3002 /*
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3003 if (MB_CUR_MAX == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3004 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3005 if (nrules == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3006 ch_collseq = collseqmb[ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3007 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3008 ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3009 if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3010 bitset_set (sbcset, ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3011 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3012 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3013 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3014
16358
a712776b11ce maint: spelling fixes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16235
diff changeset
3015 /* Local function for parse_bracket_exp used in _LIBC environment.
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3016 Build the collating element which is represented by NAME.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3017 The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3018 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
16358
a712776b11ce maint: spelling fixes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16235
diff changeset
3019 pointer argument since we may update it. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3020
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3021 auto inline reg_errcode_t
17346
cd38818bce4e regex: rename remaining __attribute calls to __attribute__.
Gary V. Vaughan <gary@gnu.org>
parents: 17338
diff changeset
3022 __attribute__ ((always_inline))
17338
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
3023 build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
3024 Idx *coll_sym_alloc, const unsigned char *name)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3025 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3026 int32_t elem, idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3027 size_t name_len = strlen ((const char *) name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3028 if (nrules != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3029 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3030 elem = seek_collating_symbol_entry (name, name_len);
17338
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
3031 if (elem != -1)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3032 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3033 /* We found the entry. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3034 idx = symb_table[2 * elem + 1];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3035 /* Skip the name of collating element name. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3036 idx += 1 + extra[idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3037 }
17338
1fff19279ff9 regex: merge patches from libc
Paul Eggert <eggert@cs.ucla.edu>
parents: 17270
diff changeset
3038 else if (name_len == 1)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3039 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3040 /* No valid character, treat it as a normal
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3041 character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3042 bitset_set (sbcset, name[0]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3043 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3044 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3045 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3046 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3047
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3048 /* Got valid collation sequence, add it as a new entry. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3049 /* Check the space of the arrays. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3050 if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3051 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3052 /* Not enough, realloc it. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3053 /* +1 in case of mbcset->ncoll_syms is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3054 Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3055 /* Use realloc since mbcset->coll_syms is NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3056 if *alloc == 0. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3057 int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3058 new_coll_sym_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3059 if (BE (new_coll_syms == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3060 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3061 mbcset->coll_syms = new_coll_syms;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3062 *coll_sym_alloc = new_coll_sym_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3063 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3064 mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3065 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3066 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3067 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3068 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3069 if (BE (name_len != 1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3070 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3071 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3072 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3073 bitset_set (sbcset, name[0]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3074 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3075 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3076 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3077 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3078 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3079
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3080 re_token_t br_token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3081 re_bitset_ptr_t sbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3082 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3083 re_charset_t *mbcset;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3084 Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3085 Idx equiv_class_alloc = 0, char_class_alloc = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3086 #endif /* not RE_ENABLE_I18N */
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3087 bool non_match = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3088 bin_tree_t *work_tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3089 int token_len;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3090 bool first_round = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3091 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3092 collseqmb = (const unsigned char *)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3093 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3094 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3095 if (nrules)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3096 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3097 /*
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3098 if (MB_CUR_MAX > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3099 */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3100 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3101 table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3102 symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3103 _NL_COLLATE_SYMB_TABLEMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3104 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3105 _NL_COLLATE_SYMB_EXTRAMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3106 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3107 #endif
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3108 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3109 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3110 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3111 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3112 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3113 if (BE (sbcset == NULL || mbcset == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3114 #else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3115 if (BE (sbcset == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3116 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3117 {
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
3118 re_free (sbcset);
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
3119 #ifdef RE_ENABLE_I18N
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
3120 re_free (mbcset);
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
3121 #endif
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3122 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3123 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3124 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3125
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3126 token_len = peek_token_bracket (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3127 if (BE (token->type == END_OF_RE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3128 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3129 *err = REG_BADPAT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3130 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3131 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3132 if (token->type == OP_NON_MATCH_LIST)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3133 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3134 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3135 mbcset->non_match = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3136 #endif /* not RE_ENABLE_I18N */
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3137 non_match = true;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3138 if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
8110
8428e492271f 2007-02-05 Paolo Bonzini <bonzini@gnu.org>
Paolo Bonzini <bonzini@gnu.org>
parents: 8073
diff changeset
3139 bitset_set (sbcset, '\n');
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3140 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3141 token_len = peek_token_bracket (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3142 if (BE (token->type == END_OF_RE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3143 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3144 *err = REG_BADPAT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3145 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3146 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3147 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3148
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3149 /* We treat the first ']' as a normal character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3150 if (token->type == OP_CLOSE_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3151 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3152
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3153 while (1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3154 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3155 bracket_elem_t start_elem, end_elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3156 unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3157 unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3158 reg_errcode_t ret;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3159 int token_len2 = 0;
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3160 bool is_range_exp = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3161 re_token_t token2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3162
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3163 start_elem.opr.name = start_name_buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3164 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3165 syntax, first_round);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3166 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3167 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3168 *err = ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3169 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3170 }
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3171 first_round = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3172
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3173 /* Get information about the next token. We need it in any case. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3174 token_len = peek_token_bracket (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3175
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3176 /* Do not check for ranges if we know they are not allowed. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3177 if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3178 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3179 if (BE (token->type == END_OF_RE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3180 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3181 *err = REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3182 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3183 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3184 if (token->type == OP_CHARSET_RANGE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3185 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3186 re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3187 token_len2 = peek_token_bracket (&token2, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3188 if (BE (token2.type == END_OF_RE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3189 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3190 *err = REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3191 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3192 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3193 if (token2.type == OP_CLOSE_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3194 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3195 /* We treat the last '-' as a normal character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3196 re_string_skip_bytes (regexp, -token_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3197 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3198 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3199 else
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3200 is_range_exp = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3201 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3202 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3203
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3204 if (is_range_exp == true)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3205 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3206 end_elem.opr.name = end_name_buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3207 ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3208 dfa, syntax, true);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3209 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3210 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3211 *err = ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3212 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3213 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3214
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3215 token_len = peek_token_bracket (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3216
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3217 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3218 *err = build_range_exp (sbcset, mbcset, &range_alloc,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3219 &start_elem, &end_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3220 #else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3221 # ifdef RE_ENABLE_I18N
13014
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
3222 *err = build_range_exp (syntax, sbcset,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3223 dfa->mb_cur_max > 1 ? mbcset : NULL,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3224 &range_alloc, &start_elem, &end_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3225 # else
13014
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
3226 *err = build_range_exp (syntax, sbcset, &start_elem, &end_elem);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3227 # endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3228 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3229 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3230 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3231 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3232 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3233 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3234 switch (start_elem.type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3235 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3236 case SB_CHAR:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3237 bitset_set (sbcset, start_elem.opr.ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3238 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3239 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3240 case MB_CHAR:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3241 /* Check whether the array has enough space. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3242 if (BE (mbchar_alloc == mbcset->nmbchars, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3243 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3244 wchar_t *new_mbchars;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3245 /* Not enough, realloc it. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3246 /* +1 in case of mbcset->nmbchars is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3247 mbchar_alloc = 2 * mbcset->nmbchars + 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3248 /* Use realloc since array is NULL if *alloc == 0. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3249 new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3250 mbchar_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3251 if (BE (new_mbchars == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3252 goto parse_bracket_exp_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3253 mbcset->mbchars = new_mbchars;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3254 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3255 mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3256 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3257 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3258 case EQUIV_CLASS:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3259 *err = build_equiv_class (sbcset,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3260 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3261 mbcset, &equiv_class_alloc,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3262 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3263 start_elem.opr.name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3264 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3265 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3266 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3267 case COLL_SYM:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3268 *err = build_collating_symbol (sbcset,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3269 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3270 mbcset, &coll_sym_alloc,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3271 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3272 start_elem.opr.name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3273 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3274 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3275 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3276 case CHAR_CLASS:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3277 *err = build_charclass (regexp->trans, sbcset,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3278 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3279 mbcset, &char_class_alloc,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3280 #endif /* RE_ENABLE_I18N */
17270
7be3e941fb5b regex: conform to strict C
Paul Eggert <eggert@cs.ucla.edu>
parents: 17258
diff changeset
3281 (const char *) start_elem.opr.name,
7be3e941fb5b regex: conform to strict C
Paul Eggert <eggert@cs.ucla.edu>
parents: 17258
diff changeset
3282 syntax);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3283 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3284 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3285 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3286 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3287 assert (0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3288 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3289 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3290 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3291 if (BE (token->type == END_OF_RE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3292 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3293 *err = REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3294 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3295 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3296 if (token->type == OP_CLOSE_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3297 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3298 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3299
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3300 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3301
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3302 /* If it is non-matching list. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3303 if (non_match)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3304 bitset_not (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3305
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3306 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3307 /* Ensure only single byte characters are set. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3308 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3309 bitset_mask (sbcset, dfa->sb_char);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3310
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3311 if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3312 || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3313 || mbcset->non_match)))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3314 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3315 bin_tree_t *mbc_tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3316 int sbc_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3317 /* Build a tree for complex bracket. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3318 dfa->has_mb_node = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3319 br_token.type = COMPLEX_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3320 br_token.opr.mbcset = mbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3321 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3322 if (BE (mbc_tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3323 goto parse_bracket_exp_espace;
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
3324 for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3325 if (sbcset[sbc_idx])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3326 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3327 /* If there are no bits set in sbcset, there is no point
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3328 of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
3329 if (sbc_idx < BITSET_WORDS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3330 {
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3331 /* Build a tree for simple bracket. */
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3332 br_token.type = SIMPLE_BRACKET;
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3333 br_token.opr.sbcset = sbcset;
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3334 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3335 if (BE (work_tree == NULL, 0))
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3336 goto parse_bracket_exp_espace;
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3337
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3338 /* Then join them by ALT node. */
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3339 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3340 if (BE (work_tree == NULL, 0))
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3341 goto parse_bracket_exp_espace;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3342 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3343 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3344 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3345 re_free (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3346 work_tree = mbc_tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3347 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3348 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3349 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3350 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3351 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3352 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3353 free_charset (mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3354 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3355 /* Build a tree for simple bracket. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3356 br_token.type = SIMPLE_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3357 br_token.opr.sbcset = sbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3358 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3359 if (BE (work_tree == NULL, 0))
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3360 goto parse_bracket_exp_espace;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3361 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3362 return work_tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3363
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3364 parse_bracket_exp_espace:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3365 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3366 parse_bracket_exp_free_return:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3367 re_free (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3368 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3369 free_charset (mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3370 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3371 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3372 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3373
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3374 /* Parse an element in the bracket expression. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3375
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3376 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3377 parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3378 re_token_t *token, int token_len, re_dfa_t *dfa,
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3379 reg_syntax_t syntax, bool accept_hyphen)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3380 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3381 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3382 int cur_char_size;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3383 cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3384 if (cur_char_size > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3385 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3386 elem->type = MB_CHAR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3387 elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3388 re_string_skip_bytes (regexp, cur_char_size);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3389 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3390 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3391 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3392 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3393 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3394 || token->type == OP_OPEN_EQUIV_CLASS)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3395 return parse_bracket_symbol (elem, regexp, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3396 if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3397 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3398 /* A '-' must only appear as anything but a range indicator before
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3399 the closing bracket. Everything else is an error. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3400 re_token_t token2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3401 (void) peek_token_bracket (&token2, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3402 if (token2.type != OP_CLOSE_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3403 /* The actual error value is not standardized since this whole
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3404 case is undefined. But ERANGE makes good sense. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3405 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3406 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3407 elem->type = SB_CHAR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3408 elem->opr.ch = token->opr.c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3409 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3410 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3411
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3412 /* Parse a bracket symbol in the bracket expression. Bracket symbols are
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3413 such as [:<character_class>:], [.<collating_element>.], and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3414 [=<equivalent_class>=]. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3415
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3416 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3417 parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3418 re_token_t *token)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3419 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3420 unsigned char ch, delim = token->opr.c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3421 int i = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3422 if (re_string_eoi(regexp))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3423 return REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3424 for (;; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3425 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3426 if (i >= BRACKET_NAME_BUF_SIZE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3427 return REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3428 if (token->type == OP_OPEN_CHAR_CLASS)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3429 ch = re_string_fetch_byte_case (regexp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3430 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3431 ch = re_string_fetch_byte (regexp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3432 if (re_string_eoi(regexp))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3433 return REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3434 if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3435 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3436 elem->opr.name[i] = ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3437 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3438 re_string_skip_bytes (regexp, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3439 elem->opr.name[i] = '\0';
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3440 switch (token->type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3441 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3442 case OP_OPEN_COLL_ELEM:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3443 elem->type = COLL_SYM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3444 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3445 case OP_OPEN_EQUIV_CLASS:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3446 elem->type = EQUIV_CLASS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3447 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3448 case OP_OPEN_CHAR_CLASS:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3449 elem->type = CHAR_CLASS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3450 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3451 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3452 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3453 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3454 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3455 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3456
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3457 /* Helper function for parse_bracket_exp.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3458 Build the equivalence class which is represented by NAME.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3459 The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3460 EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
16358
a712776b11ce maint: spelling fixes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16235
diff changeset
3461 is a pointer argument since we may update it. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3462
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3463 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3464 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3465 build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3466 Idx *equiv_class_alloc, const unsigned char *name)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3467 #else /* not RE_ENABLE_I18N */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3468 build_equiv_class (bitset_t sbcset, const unsigned char *name)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3469 #endif /* not RE_ENABLE_I18N */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3470 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3471 #ifdef _LIBC
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3472 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3473 if (nrules != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3474 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3475 const int32_t *table, *indirect;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3476 const unsigned char *weights, *extra, *cp;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3477 unsigned char char_buf[2];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3478 int32_t idx1, idx2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3479 unsigned int ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3480 size_t len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3481 /* This #include defines a local function! */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3482 # include <locale/weight.h>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3483 /* Calculate the index for equivalence class. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3484 cp = name;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3485 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3486 weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3487 _NL_COLLATE_WEIGHTMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3488 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3489 _NL_COLLATE_EXTRAMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3490 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3491 _NL_COLLATE_INDIRECTMB);
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
3492 idx1 = findidx (&cp, -1);
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
3493 if (BE (idx1 == 0 || *cp != '\0', 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3494 /* This isn't a valid character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3495 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3496
16358
a712776b11ce maint: spelling fixes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16235
diff changeset
3497 /* Build single byte matching table for this equivalence class. */
12572
b11c0a312a68 regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents: 12571
diff changeset
3498 len = weights[idx1 & 0xffffff];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3499 for (ch = 0; ch < SBC_MAX; ++ch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3500 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3501 char_buf[0] = ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3502 cp = char_buf;
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
3503 idx2 = findidx (&cp, 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3504 /*
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3505 idx2 = table[ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3506 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3507 if (idx2 == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3508 /* This isn't a valid character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3509 continue;
12572
b11c0a312a68 regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents: 12571
diff changeset
3510 /* Compare only if the length matches and the collation rule
b11c0a312a68 regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents: 12571
diff changeset
3511 index is the same. */
b11c0a312a68 regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents: 12571
diff changeset
3512 if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3513 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3514 int cnt = 0;
12572
b11c0a312a68 regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents: 12571
diff changeset
3515
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3516 while (cnt <= len &&
12572
b11c0a312a68 regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents: 12571
diff changeset
3517 weights[(idx1 & 0xffffff) + 1 + cnt]
b11c0a312a68 regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents: 12571
diff changeset
3518 == weights[(idx2 & 0xffffff) + 1 + cnt])
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3519 ++cnt;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3520
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3521 if (cnt > len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3522 bitset_set (sbcset, ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3523 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3524 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3525 /* Check whether the array has enough space. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3526 if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3527 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3528 /* Not enough, realloc it. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3529 /* +1 in case of mbcset->nequiv_classes is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3530 Idx new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3531 /* Use realloc since the array is NULL if *alloc == 0. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3532 int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3533 int32_t,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3534 new_equiv_class_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3535 if (BE (new_equiv_classes == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3536 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3537 mbcset->equiv_classes = new_equiv_classes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3538 *equiv_class_alloc = new_equiv_class_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3539 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3540 mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3541 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3542 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3543 #endif /* _LIBC */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3544 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3545 if (BE (strlen ((const char *) name) != 1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3546 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3547 bitset_set (sbcset, *name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3548 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3549 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3550 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3551
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3552 /* Helper function for parse_bracket_exp.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3553 Build the character class which is represented by NAME.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3554 The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3555 CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
16358
a712776b11ce maint: spelling fixes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16235
diff changeset
3556 is a pointer argument since we may update it. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3557
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3558 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3559 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3560 build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3561 re_charset_t *mbcset, Idx *char_class_alloc,
17258
28b073aabf32 regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents: 17249
diff changeset
3562 const char *class_name, reg_syntax_t syntax)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3563 #else /* not RE_ENABLE_I18N */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3564 build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
17258
28b073aabf32 regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents: 17249
diff changeset
3565 const char *class_name, reg_syntax_t syntax)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3566 #endif /* not RE_ENABLE_I18N */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3567 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3568 int i;
17258
28b073aabf32 regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents: 17249
diff changeset
3569 const char *name = class_name;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3570
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3571 /* In case of REG_ICASE "upper" and "lower" match the both of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3572 upper and lower cases. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3573 if ((syntax & RE_ICASE)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3574 && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3575 name = "alpha";
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3576
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3577 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3578 /* Check the space of the arrays. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3579 if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3580 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3581 /* Not enough, realloc it. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3582 /* +1 in case of mbcset->nchar_classes is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3583 Idx new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3584 /* Use realloc since array is NULL if *alloc == 0. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3585 wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3586 new_char_class_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3587 if (BE (new_char_classes == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3588 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3589 mbcset->char_classes = new_char_classes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3590 *char_class_alloc = new_char_class_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3591 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3592 mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3593 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3594
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3595 #define BUILD_CHARCLASS_LOOP(ctype_func) \
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3596 do { \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3597 if (BE (trans != NULL, 0)) \
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3598 { \
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3599 for (i = 0; i < SBC_MAX; ++i) \
6729
c5495b5c3f32 Fix space-tab problem. From Jim Meyering.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
3600 if (ctype_func (i)) \
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3601 bitset_set (sbcset, trans[i]); \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3602 } \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3603 else \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3604 { \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3605 for (i = 0; i < SBC_MAX; ++i) \
6729
c5495b5c3f32 Fix space-tab problem. From Jim Meyering.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
3606 if (ctype_func (i)) \
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3607 bitset_set (sbcset, i); \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3608 } \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3609 } while (0)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3610
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3611 if (strcmp (name, "alnum") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3612 BUILD_CHARCLASS_LOOP (isalnum);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3613 else if (strcmp (name, "cntrl") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3614 BUILD_CHARCLASS_LOOP (iscntrl);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3615 else if (strcmp (name, "lower") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3616 BUILD_CHARCLASS_LOOP (islower);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3617 else if (strcmp (name, "space") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3618 BUILD_CHARCLASS_LOOP (isspace);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3619 else if (strcmp (name, "alpha") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3620 BUILD_CHARCLASS_LOOP (isalpha);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3621 else if (strcmp (name, "digit") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3622 BUILD_CHARCLASS_LOOP (isdigit);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3623 else if (strcmp (name, "print") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3624 BUILD_CHARCLASS_LOOP (isprint);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3625 else if (strcmp (name, "upper") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3626 BUILD_CHARCLASS_LOOP (isupper);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3627 else if (strcmp (name, "blank") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3628 BUILD_CHARCLASS_LOOP (isblank);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3629 else if (strcmp (name, "graph") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3630 BUILD_CHARCLASS_LOOP (isgraph);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3631 else if (strcmp (name, "punct") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3632 BUILD_CHARCLASS_LOOP (ispunct);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3633 else if (strcmp (name, "xdigit") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3634 BUILD_CHARCLASS_LOOP (isxdigit);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3635 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3636 return REG_ECTYPE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3637
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3638 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3639 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3640
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3641 static bin_tree_t *
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3642 build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
17258
28b073aabf32 regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents: 17249
diff changeset
3643 const char *class_name,
28b073aabf32 regex: omit needless signed-pointer casts
Paul Eggert <eggert@cs.ucla.edu>
parents: 17249
diff changeset
3644 const char *extra, bool non_match,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3645 reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3646 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3647 re_bitset_ptr_t sbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3648 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3649 re_charset_t *mbcset;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3650 Idx alloc = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3651 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3652 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3653 re_token_t br_token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3654 bin_tree_t *tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3655
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3656 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3657 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3658 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3659 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3660
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3661 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3662 if (BE (sbcset == NULL || mbcset == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3663 #else /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3664 if (BE (sbcset == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3665 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3666 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3667 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3668 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3669 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3670
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3671 if (non_match)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3672 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3673 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3674 mbcset->non_match = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3675 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3676 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3677
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3678 /* We don't care the syntax in this case. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3679 ret = build_charclass (trans, sbcset,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3680 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3681 mbcset, &alloc,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3682 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3683 class_name, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3684
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3685 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3686 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3687 re_free (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3688 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3689 free_charset (mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3690 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3691 *err = ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3692 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3693 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3694 /* \w match '_' also. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3695 for (; *extra; extra++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3696 bitset_set (sbcset, *extra);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3697
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3698 /* If it is non-matching list. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3699 if (non_match)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3700 bitset_not (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3701
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3702 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3703 /* Ensure only single byte characters are set. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3704 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3705 bitset_mask (sbcset, dfa->sb_char);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3706 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3707
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3708 /* Build a tree for simple bracket. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3709 br_token.type = SIMPLE_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3710 br_token.opr.sbcset = sbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3711 tree = create_token_tree (dfa, NULL, NULL, &br_token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3712 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3713 goto build_word_op_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3714
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3715 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3716 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3717 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3718 bin_tree_t *mbc_tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3719 /* Build a tree for complex bracket. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3720 br_token.type = COMPLEX_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3721 br_token.opr.mbcset = mbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3722 dfa->has_mb_node = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3723 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3724 if (BE (mbc_tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3725 goto build_word_op_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3726 /* Then join them by ALT node. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3727 tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3728 if (BE (mbc_tree != NULL, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3729 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3730 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3731 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3732 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3733 free_charset (mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3734 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3735 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3736 #else /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3737 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3738 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3739
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3740 build_word_op_espace:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3741 re_free (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3742 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3743 free_charset (mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3744 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3745 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3746 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3747 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3748
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3749 /* This is intended for the expressions like "a{1,3}".
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
3750 Fetch a number from 'input', and return the number.
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3751 Return REG_MISSING if the number field is empty like "{,1}".
16705
54b750a813cb regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents: 16366
diff changeset
3752 Return RE_DUP_MAX + 1 if the number field is too large.
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3753 Return REG_ERROR if an error occurred. */
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3754
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3755 static Idx
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3756 fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3757 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3758 Idx num = REG_MISSING;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3759 unsigned char c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3760 while (1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3761 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3762 fetch_token (token, input, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3763 c = token->opr.c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3764 if (BE (token->type == END_OF_RE, 0))
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3765 return REG_ERROR;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3766 if (token->type == OP_CLOSE_DUP_NUM || c == ',')
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3767 break;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3768 num = ((token->type != CHARACTER || c < '0' || '9' < c
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3769 || num == REG_ERROR)
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3770 ? REG_ERROR
16705
54b750a813cb regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents: 16366
diff changeset
3771 : num == REG_MISSING
54b750a813cb regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents: 16366
diff changeset
3772 ? c - '0'
54b750a813cb regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents: 16366
diff changeset
3773 : MIN (RE_DUP_MAX + 1, num * 10 + c - '0'));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3774 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3775 return num;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3776 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3777
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3778 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3779 static void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3780 free_charset (re_charset_t *cset)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3781 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3782 re_free (cset->mbchars);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3783 # ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3784 re_free (cset->coll_syms);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3785 re_free (cset->equiv_classes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3786 re_free (cset->range_starts);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3787 re_free (cset->range_ends);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3788 # endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3789 re_free (cset->char_classes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3790 re_free (cset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3791 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3792 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3793
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3794 /* Functions for binary tree operation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3795
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3796 /* Create a tree node. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3797
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3798 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3799 create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3800 re_token_type_t type)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3801 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3802 re_token_t t;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3803 t.type = type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3804 return create_token_tree (dfa, left, right, &t);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3805 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3806
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3807 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3808 create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3809 const re_token_t *token)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3810 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3811 bin_tree_t *tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3812 if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3813 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3814 bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3815
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3816 if (storage == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3817 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3818 storage->next = dfa->str_tree_storage;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3819 dfa->str_tree_storage = storage;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3820 dfa->str_tree_storage_idx = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3821 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3822 tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3823
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3824 tree->parent = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3825 tree->left = left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3826 tree->right = right;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3827 tree->token = *token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3828 tree->token.duplicated = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3829 tree->token.opt_subexp = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3830 tree->first = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3831 tree->next = NULL;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3832 tree->node_idx = REG_MISSING;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3833
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3834 if (left != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3835 left->parent = tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3836 if (right != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3837 right->parent = tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3838 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3839 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3840
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3841 /* Mark the tree SRC as an optional subexpression.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3842 To be called from preorder or postorder. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3843
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3844 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3845 mark_opt_subexp (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3846 {
16912
1591c84dbb2d regex: avoid warning when pointers are not long
Paul Eggert <eggert@cs.ucla.edu>
parents: 16882
diff changeset
3847 Idx idx = (uintptr_t) extra;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3848 if (node->token.type == SUBEXP && node->token.opr.idx == idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3849 node->token.opt_subexp = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3850
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3851 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3852 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3853
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3854 /* Free the allocated memory inside NODE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3855
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3856 static void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3857 free_token (re_token_t *node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3858 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3859 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3860 if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3861 free_charset (node->opr.mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3862 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3863 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3864 if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3865 re_free (node->opr.sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3866 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3867
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3868 /* Worker function for tree walking. Free the allocated memory inside NODE
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3869 and its children. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3870
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3871 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3872 free_tree (void *extra, bin_tree_t *node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3873 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3874 free_token (&node->token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3875 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3876 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3877
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3878
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3879 /* Duplicate the node SRC, and return new node. This is a preorder
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3880 visit similar to the one implemented by the generic visitor, but
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3881 we need more infrastructure to maintain two parallel trees --- so,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3882 it's easier to duplicate. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3883
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3884 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3885 duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3886 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3887 const bin_tree_t *node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3888 bin_tree_t *dup_root;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3889 bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3890
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3891 for (node = root; ; )
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3892 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3893 /* Create a new tree and link it back to the current parent. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3894 *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3895 if (*p_new == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3896 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3897 (*p_new)->parent = dup_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3898 (*p_new)->token.duplicated = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3899 dup_node = *p_new;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3900
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3901 /* Go to the left node, or up and to the right. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3902 if (node->left)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3903 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3904 node = node->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3905 p_new = &dup_node->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3906 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3907 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3908 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3909 const bin_tree_t *prev = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3910 while (node->right == prev || node->right == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3911 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3912 prev = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3913 node = node->parent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3914 dup_node = dup_node->parent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3915 if (!node)
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3916 return dup_root;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3917 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3918 node = node->right;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3919 p_new = &dup_node->right;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3920 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3921 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3922 }