annotate lib/regcomp.c @ 16235:18a38c9615f0

In commentary, do not use ` to quote.
author Paul Eggert <eggert@cs.ucla.edu>
date Thu, 05 Jan 2012 23:53:49 -0800
parents 8250f2777afc
children a712776b11ce
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1 /* Extended regular expression matching and search library.
16201
8250f2777afc maint: update all copyright year number ranges
Jim Meyering <meyering@redhat.com>
parents: 14079
diff changeset
2 Copyright (C) 2002-2012 Free Software Foundation, Inc.
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3 This file is part of the GNU C Library.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
5
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
6 This program is free software; you can redistribute it and/or modify
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
7 it under the terms of the GNU General Public License as published by
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
8 the Free Software Foundation; either version 2, or (at your option)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
9 any later version.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
10
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
11 This program is distributed in the hope that it will be useful,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
14 GNU General Public License for more details.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
15
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
16 You should have received a copy of the GNU General Public License along
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
17 with this program; if not, write to the Free Software Foundation,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
19
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
20 static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
21 size_t length, reg_syntax_t syntax);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
22 static void re_compile_fastmap_iter (regex_t *bufp,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
23 const re_dfastate_t *init_state,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
24 char *fastmap);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
25 static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
26 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
27 static void free_charset (re_charset_t *cset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
28 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
29 static void free_workarea_compile (regex_t *preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
30 static reg_errcode_t create_initial_state (re_dfa_t *dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
31 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
32 static void optimize_utf8 (re_dfa_t *dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
33 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
34 static reg_errcode_t analyze (regex_t *preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
35 static reg_errcode_t preorder (bin_tree_t *root,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
36 reg_errcode_t (fn (void *, bin_tree_t *)),
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
37 void *extra);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
38 static reg_errcode_t postorder (bin_tree_t *root,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
39 reg_errcode_t (fn (void *, bin_tree_t *)),
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
40 void *extra);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
41 static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
42 static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
43 static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
44 bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
45 static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
46 static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
47 static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
48 static Idx duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint);
6185
6b09f7f6ba73 * lib/regcomp.c (search_duplicated_node): Make first pointer arg
Paul Eggert <eggert@cs.ucla.edu>
parents: 6184
diff changeset
49 static Idx search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
50 unsigned int constraint);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
51 static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
52 static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
53 Idx node, bool root);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
54 static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
55 static Idx fetch_number (re_string_t *input, re_token_t *token,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
56 reg_syntax_t syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
57 static int peek_token (re_token_t *token, re_string_t *input,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
58 reg_syntax_t syntax) internal_function;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
59 static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
60 reg_syntax_t syntax, reg_errcode_t *err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
61 static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
62 re_token_t *token, reg_syntax_t syntax,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
63 Idx nest, reg_errcode_t *err);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
64 static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
65 re_token_t *token, reg_syntax_t syntax,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
66 Idx nest, reg_errcode_t *err);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
67 static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
68 re_token_t *token, reg_syntax_t syntax,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
69 Idx nest, reg_errcode_t *err);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
70 static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
71 re_token_t *token, reg_syntax_t syntax,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
72 Idx nest, reg_errcode_t *err);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
73 static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
74 re_dfa_t *dfa, re_token_t *token,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
75 reg_syntax_t syntax, reg_errcode_t *err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
76 static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
77 re_token_t *token, reg_syntax_t syntax,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
78 reg_errcode_t *err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
79 static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
80 re_string_t *regexp,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
81 re_token_t *token, int token_len,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
82 re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
83 reg_syntax_t syntax,
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
84 bool accept_hyphen);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
85 static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
86 re_string_t *regexp,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
87 re_token_t *token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
88 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
89 static reg_errcode_t build_equiv_class (bitset_t sbcset,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
90 re_charset_t *mbcset,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
91 Idx *equiv_class_alloc,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
92 const unsigned char *name);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
93 static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
94 bitset_t sbcset,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
95 re_charset_t *mbcset,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
96 Idx *char_class_alloc,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
97 const unsigned char *class_name,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
98 reg_syntax_t syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
99 #else /* not RE_ENABLE_I18N */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
100 static reg_errcode_t build_equiv_class (bitset_t sbcset,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
101 const unsigned char *name);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
102 static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
103 bitset_t sbcset,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
104 const unsigned char *class_name,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
105 reg_syntax_t syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
106 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
107 static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
108 RE_TRANSLATE_TYPE trans,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
109 const unsigned char *class_name,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
110 const unsigned char *extra,
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
111 bool non_match, reg_errcode_t *err);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
112 static bin_tree_t *create_tree (re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
113 bin_tree_t *left, bin_tree_t *right,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
114 re_token_type_t type);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
115 static bin_tree_t *create_token_tree (re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
116 bin_tree_t *left, bin_tree_t *right,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
117 const re_token_t *token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
118 static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
119 static void free_token (re_token_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
120 static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
121 static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
122
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
123 /* This table gives an error message for each of the error codes listed
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
124 in regex.h. Obviously the order here has to be same as there.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
125 POSIX doesn't require that we do anything for REG_NOERROR,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
126 but why not be nice? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
127
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
128 static const char __re_error_msgid[] =
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
129 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
130 #define REG_NOERROR_IDX 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
131 gettext_noop ("Success") /* REG_NOERROR */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
132 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
133 #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
134 gettext_noop ("No match") /* REG_NOMATCH */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
135 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
136 #define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
137 gettext_noop ("Invalid regular expression") /* REG_BADPAT */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
138 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
139 #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
140 gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
141 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
142 #define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
143 gettext_noop ("Invalid character class name") /* REG_ECTYPE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
144 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
145 #define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
146 gettext_noop ("Trailing backslash") /* REG_EESCAPE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
147 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
148 #define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
149 gettext_noop ("Invalid back reference") /* REG_ESUBREG */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
150 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
151 #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
152 gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
153 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
154 #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
155 gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
156 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
157 #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
158 gettext_noop ("Unmatched \\{") /* REG_EBRACE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
159 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
160 #define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
161 gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
162 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
163 #define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
164 gettext_noop ("Invalid range end") /* REG_ERANGE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
165 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
166 #define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
167 gettext_noop ("Memory exhausted") /* REG_ESPACE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
168 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
169 #define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
170 gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
171 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
172 #define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
173 gettext_noop ("Premature end of regular expression") /* REG_EEND */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
174 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
175 #define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
176 gettext_noop ("Regular expression too big") /* REG_ESIZE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
177 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
178 #define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
179 gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
180 };
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
181
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
182 static const size_t __re_error_msgid_idx[] =
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
183 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
184 REG_NOERROR_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
185 REG_NOMATCH_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
186 REG_BADPAT_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
187 REG_ECOLLATE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
188 REG_ECTYPE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
189 REG_EESCAPE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
190 REG_ESUBREG_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
191 REG_EBRACK_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
192 REG_EPAREN_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
193 REG_EBRACE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
194 REG_BADBR_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
195 REG_ERANGE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
196 REG_ESPACE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
197 REG_BADRPT_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
198 REG_EEND_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
199 REG_ESIZE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
200 REG_ERPAREN_IDX
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
201 };
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
202
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
203 /* Entry points for GNU code. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
204
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
205 /* re_compile_pattern is the GNU regular expression compiler: it
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
206 compiles PATTERN (of length LENGTH) and puts the result in BUFP.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
207 Returns 0 if the pattern was valid, otherwise an error string.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
208
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
209 Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
210 are set in BUFP on entry. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
211
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
212 #ifdef _LIBC
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
213 const char *
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
214 re_compile_pattern (pattern, length, bufp)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
215 const char *pattern;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
216 size_t length;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
217 struct re_pattern_buffer *bufp;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
218 #else /* size_t might promote */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
219 const char *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
220 re_compile_pattern (const char *pattern, size_t length,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
221 struct re_pattern_buffer *bufp)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
222 #endif
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
223 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
224 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
225
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
226 /* And GNU code determines whether or not to get register information
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
227 by passing null for the REGS argument to re_match, etc., not by
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
228 setting no_sub, unless RE_NO_SUB is set. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
229 bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
230
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
231 /* Match anchors at newline. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
232 bufp->newline_anchor = 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
233
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
234 ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
235
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
236 if (!ret)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
237 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
238 return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
239 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
240 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
241 weak_alias (__re_compile_pattern, re_compile_pattern)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
242 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
243
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
244 /* Set by 're_set_syntax' to the current regexp syntax to recognize. Can
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
245 also be assigned to arbitrarily: each pattern buffer stores its own
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
246 syntax, so it can be changed between regex compilations. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
247 /* This has no initializer because initialized variables in Emacs
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
248 become read-only after dumping. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
249 reg_syntax_t re_syntax_options;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
250
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
251
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
252 /* Specify the precise syntax of regexps for compilation. This provides
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
253 for compatibility for various utilities which historically have
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
254 different, incompatible syntaxes.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
255
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
256 The argument SYNTAX is a bit mask comprised of the various bits
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
257 defined in regex.h. We return the old syntax. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
258
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
259 reg_syntax_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
260 re_set_syntax (syntax)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
261 reg_syntax_t syntax;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
262 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
263 reg_syntax_t ret = re_syntax_options;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
264
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
265 re_syntax_options = syntax;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
266 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
267 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
268 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
269 weak_alias (__re_set_syntax, re_set_syntax)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
270 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
271
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
272 int
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
273 re_compile_fastmap (bufp)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
274 struct re_pattern_buffer *bufp;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
275 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
276 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
277 char *fastmap = bufp->fastmap;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
278
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
279 memset (fastmap, '\0', sizeof (char) * SBC_MAX);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
280 re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
281 if (dfa->init_state != dfa->init_state_word)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
282 re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
283 if (dfa->init_state != dfa->init_state_nl)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
284 re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
285 if (dfa->init_state != dfa->init_state_begbuf)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
286 re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
287 bufp->fastmap_accurate = 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
288 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
289 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
290 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
291 weak_alias (__re_compile_fastmap, re_compile_fastmap)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
292 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
293
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
294 static inline void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
295 __attribute ((always_inline))
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
296 re_set_fastmap (char *fastmap, bool icase, int ch)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
297 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
298 fastmap[ch] = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
299 if (icase)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
300 fastmap[tolower (ch)] = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
301 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
302
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
303 /* Helper function for re_compile_fastmap.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
304 Compile fastmap for the initial_state INIT_STATE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
305
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
306 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
307 re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
308 char *fastmap)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
309 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
310 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
311 Idx node_cnt;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
312 bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
313 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
314 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
315 Idx node = init_state->nodes.elems[node_cnt];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
316 re_token_type_t type = dfa->nodes[node].type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
317
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
318 if (type == CHARACTER)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
319 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
320 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
321 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
322 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
323 {
6119
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6104
diff changeset
324 unsigned char buf[MB_LEN_MAX];
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6104
diff changeset
325 unsigned char *p;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
326 wchar_t wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
327 mbstate_t state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
328
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
329 p = buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
330 *p++ = dfa->nodes[node].opr.c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
331 while (++node < dfa->nodes_len
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
332 && dfa->nodes[node].type == CHARACTER
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
333 && dfa->nodes[node].mb_partial)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
334 *p++ = dfa->nodes[node].opr.c;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
335 memset (&state, '\0', sizeof (state));
10998
cc7a1af3872f regex: replace mbrtowc with __mbrtowc.
Paolo Bonzini <bonzini@gnu.org>
parents: 10078
diff changeset
336 if (__mbrtowc (&wc, (const char *) buf, p - buf,
cc7a1af3872f regex: replace mbrtowc with __mbrtowc.
Paolo Bonzini <bonzini@gnu.org>
parents: 10078
diff changeset
337 &state) == p - buf
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
338 && (__wcrtomb ((char *) buf, towlower (wc), &state)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
339 != (size_t) -1))
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
340 re_set_fastmap (fastmap, false, buf[0]);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
341 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
342 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
343 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
344 else if (type == SIMPLE_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
345 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
346 int i, ch;
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
347 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
348 {
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
349 int j;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
350 bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
351 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
352 if (w & ((bitset_word_t) 1 << j))
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
353 re_set_fastmap (fastmap, icase, ch);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
354 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
355 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
356 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
357 else if (type == COMPLEX_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
358 {
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
359 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
360 Idx i;
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
361
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
362 # ifdef _LIBC
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
363 /* See if we have to try all bytes which start multiple collation
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
364 elements.
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
365 e.g. In da_DK, we want to catch 'a' since "aa" is a valid
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
366 collation element, and don't catch 'b' since 'b' is
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
367 the only collation element which starts from 'b' (and
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
368 it is caught by SIMPLE_BRACKET). */
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
369 if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
370 && (cset->ncoll_syms || cset->nranges))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
371 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
372 const int32_t *table = (const int32_t *)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
373 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
374 for (i = 0; i < SBC_MAX; ++i)
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
375 if (table[i] < 0)
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
376 re_set_fastmap (fastmap, icase, i);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
377 }
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
378 # endif /* _LIBC */
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
379
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
380 /* See if we have to start the match at all multibyte characters,
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
381 i.e. where we would not find an invalid sequence. This only
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
382 applies to multibyte character sets; for single byte character
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
383 sets, the SIMPLE_BRACKET again suffices. */
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
384 if (dfa->mb_cur_max > 1
12351
0b80ccdc9aa4 regex: Fix fastmap for multibyte character ranges.
Paolo Bonzini <bonzini@gnu.org>
parents: 11000
diff changeset
385 && (cset->nchar_classes || cset->non_match || cset->nranges
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
386 # ifdef _LIBC
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
387 || cset->nequiv_classes
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
388 # endif /* _LIBC */
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
389 ))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
390 {
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
391 unsigned char c = 0;
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
392 do
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
393 {
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
394 mbstate_t mbs;
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
395 memset (&mbs, 0, sizeof (mbs));
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
396 if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
397 re_set_fastmap (fastmap, false, (int) c);
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
398 }
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
399 while (++c != 0);
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
400 }
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
401
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
402 else
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
403 {
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
404 /* ... Else catch all bytes which can start the mbchars. */
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
405 for (i = 0; i < cset->nmbchars; ++i)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
406 {
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
407 char buf[256];
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
408 mbstate_t state;
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
409 memset (&state, '\0', sizeof (state));
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
410 if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
411 re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
412 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
413 {
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
414 if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
415 != (size_t) -1)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
416 re_set_fastmap (fastmap, false, *(unsigned char *) buf);
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
417 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
418 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
419 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
420 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
421 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
422 else if (type == OP_PERIOD
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
423 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
424 || type == OP_UTF8_PERIOD
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
425 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
426 || type == END_OF_RE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
427 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
428 memset (fastmap, '\1', sizeof (char) * SBC_MAX);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
429 if (type == END_OF_RE)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
430 bufp->can_be_null = 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
431 return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
432 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
433 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
434 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
435
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
436 /* Entry point for POSIX code. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
437 /* regcomp takes a regular expression as a string and compiles it.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
438
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
439 PREG is a regex_t *. We do not expect any fields to be initialized,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
440 since POSIX says we shouldn't. Thus, we set
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
441
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
442 'buffer' to the compiled pattern;
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
443 'used' to the length of the compiled pattern;
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
444 'syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
445 REG_EXTENDED bit in CFLAGS is set; otherwise, to
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
446 RE_SYNTAX_POSIX_BASIC;
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
447 'newline_anchor' to REG_NEWLINE being set in CFLAGS;
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
448 'fastmap' to an allocated space for the fastmap;
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
449 'fastmap_accurate' to zero;
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
450 're_nsub' to the number of subexpressions in PATTERN.
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
451
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
452 PATTERN is the address of the pattern string.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
453
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
454 CFLAGS is a series of bits which affect compilation.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
455
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
456 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
457 use POSIX basic syntax.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
458
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
459 If REG_NEWLINE is set, then . and [^...] don't match newline.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
460 Also, regexec will try a match beginning after every newline.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
461
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
462 If REG_ICASE is set, then we considers upper- and lowercase
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
463 versions of letters to be equivalent when matching.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
464
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
465 If REG_NOSUB is set, then when PREG is passed to regexec, that
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
466 routine will report only success or failure, and nothing about the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
467 registers.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
468
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
469 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
470 the return codes and their meanings.) */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
471
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
472 int
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
473 regcomp (preg, pattern, cflags)
8045
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
474 regex_t *_Restrict_ preg;
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
475 const char *_Restrict_ pattern;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
476 int cflags;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
477 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
478 reg_errcode_t ret;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
479 reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
480 : RE_SYNTAX_POSIX_BASIC);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
481
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
482 preg->buffer = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
483 preg->allocated = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
484 preg->used = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
485
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
486 /* Try to allocate space for the fastmap. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
487 preg->fastmap = re_malloc (char, SBC_MAX);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
488 if (BE (preg->fastmap == NULL, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
489 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
490
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
491 syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
492
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
493 /* If REG_NEWLINE is set, newlines are treated differently. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
494 if (cflags & REG_NEWLINE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
495 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
496 syntax &= ~RE_DOT_NEWLINE;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
497 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
498 /* It also changes the matching behavior. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
499 preg->newline_anchor = 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
500 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
501 else
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
502 preg->newline_anchor = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
503 preg->no_sub = !!(cflags & REG_NOSUB);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
504 preg->translate = NULL;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
505
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
506 ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
507
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
508 /* POSIX doesn't distinguish between an unmatched open-group and an
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
509 unmatched close-group: both are REG_EPAREN. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
510 if (ret == REG_ERPAREN)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
511 ret = REG_EPAREN;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
512
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
513 /* We have already checked preg->fastmap != NULL. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
514 if (BE (ret == REG_NOERROR, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
515 /* Compute the fastmap now, since regexec cannot modify the pattern
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
516 buffer. This function never fails in this implementation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
517 (void) re_compile_fastmap (preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
518 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
519 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
520 /* Some error occurred while compiling the expression. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
521 re_free (preg->fastmap);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
522 preg->fastmap = NULL;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
523 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
524
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
525 return (int) ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
526 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
527 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
528 weak_alias (__regcomp, regcomp)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
529 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
530
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
531 /* Returns a message corresponding to an error code, ERRCODE, returned
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
532 from either regcomp or regexec. We don't use PREG here. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
533
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
534 #ifdef _LIBC
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
535 size_t
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
536 regerror (errcode, preg, errbuf, errbuf_size)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
537 int errcode;
8045
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
538 const regex_t *_Restrict_ preg;
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
539 char *_Restrict_ errbuf;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
540 size_t errbuf_size;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
541 #else /* size_t might promote */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
542 size_t
8045
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
543 regerror (int errcode, const regex_t *_Restrict_ preg,
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
544 char *_Restrict_ errbuf, size_t errbuf_size)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
545 #endif
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
546 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
547 const char *msg;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
548 size_t msg_size;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
549
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
550 if (BE (errcode < 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
551 || errcode >= (int) (sizeof (__re_error_msgid_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
552 / sizeof (__re_error_msgid_idx[0])), 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
553 /* Only error codes returned by the rest of the code should be passed
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
554 to this routine. If we are given anything else, or if other regex
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
555 code generates an invalid error code, then the program has a bug.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
556 Dump core so we can fix it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
557 abort ();
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
558
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
559 msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
560
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
561 msg_size = strlen (msg) + 1; /* Includes the null. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
562
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
563 if (BE (errbuf_size != 0, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
564 {
8073
eaa00773406b Avoid mempcpy in the regex code, as the string.h mempcpy stuff
Paul Eggert <eggert@cs.ucla.edu>
parents: 8045
diff changeset
565 size_t cpy_size = msg_size;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
566 if (BE (msg_size > errbuf_size, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
567 {
8073
eaa00773406b Avoid mempcpy in the regex code, as the string.h mempcpy stuff
Paul Eggert <eggert@cs.ucla.edu>
parents: 8045
diff changeset
568 cpy_size = errbuf_size - 1;
eaa00773406b Avoid mempcpy in the regex code, as the string.h mempcpy stuff
Paul Eggert <eggert@cs.ucla.edu>
parents: 8045
diff changeset
569 errbuf[cpy_size] = '\0';
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
570 }
8073
eaa00773406b Avoid mempcpy in the regex code, as the string.h mempcpy stuff
Paul Eggert <eggert@cs.ucla.edu>
parents: 8045
diff changeset
571 memcpy (errbuf, msg, cpy_size);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
572 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
573
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
574 return msg_size;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
575 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
576 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
577 weak_alias (__regerror, regerror)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
578 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
579
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
580
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
581 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
582 /* This static array is used for the map to single-byte characters when
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
583 UTF-8 is used. Otherwise we would allocate memory just to initialize
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
584 it the same all the time. UTF-8 is the preferred encoding so this is
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
585 a worthwhile optimization. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
586 static const bitset_t utf8_sb_map =
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
587 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
588 /* Set the first 128 bits. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
589 # if 4 * BITSET_WORD_BITS < ASCII_CHARS
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
590 # error "bitset_word_t is narrower than 32 bits"
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
591 # elif 3 * BITSET_WORD_BITS < ASCII_CHARS
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
592 BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
593 # elif 2 * BITSET_WORD_BITS < ASCII_CHARS
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
594 BITSET_WORD_MAX, BITSET_WORD_MAX,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
595 # elif 1 * BITSET_WORD_BITS < ASCII_CHARS
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
596 BITSET_WORD_MAX,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
597 # endif
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
598 (BITSET_WORD_MAX
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
599 >> (SBC_MAX % BITSET_WORD_BITS == 0
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
600 ? 0
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
601 : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
602 };
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
603 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
604
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
605
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
606 static void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
607 free_dfa_content (re_dfa_t *dfa)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
608 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
609 Idx i, j;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
610
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
611 if (dfa->nodes)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
612 for (i = 0; i < dfa->nodes_len; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
613 free_token (dfa->nodes + i);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
614 re_free (dfa->nexts);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
615 for (i = 0; i < dfa->nodes_len; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
616 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
617 if (dfa->eclosures != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
618 re_node_set_free (dfa->eclosures + i);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
619 if (dfa->inveclosures != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
620 re_node_set_free (dfa->inveclosures + i);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
621 if (dfa->edests != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
622 re_node_set_free (dfa->edests + i);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
623 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
624 re_free (dfa->edests);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
625 re_free (dfa->eclosures);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
626 re_free (dfa->inveclosures);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
627 re_free (dfa->nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
628
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
629 if (dfa->state_table)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
630 for (i = 0; i <= dfa->state_hash_mask; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
631 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
632 struct re_state_table_entry *entry = dfa->state_table + i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
633 for (j = 0; j < entry->num; ++j)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
634 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
635 re_dfastate_t *state = entry->array[j];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
636 free_state (state);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
637 }
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
638 re_free (entry->array);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
639 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
640 re_free (dfa->state_table);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
641 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
642 if (dfa->sb_char != utf8_sb_map)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
643 re_free (dfa->sb_char);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
644 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
645 re_free (dfa->subexp_map);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
646 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
647 re_free (dfa->re_str);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
648 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
649
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
650 re_free (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
651 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
652
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
653
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
654 /* Free dynamically allocated space used by PREG. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
655
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
656 void
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
657 regfree (preg)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
658 regex_t *preg;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
659 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
660 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
661 if (BE (dfa != NULL, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
662 free_dfa_content (dfa);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
663 preg->buffer = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
664 preg->allocated = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
665
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
666 re_free (preg->fastmap);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
667 preg->fastmap = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
668
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
669 re_free (preg->translate);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
670 preg->translate = NULL;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
671 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
672 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
673 weak_alias (__regfree, regfree)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
674 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
675
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
676 /* Entry points compatible with 4.2 BSD regex library. We don't define
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
677 them unless specifically requested. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
678
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
679 #if defined _REGEX_RE_COMP || defined _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
680
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
681 /* BSD has one and only one pattern buffer. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
682 static struct re_pattern_buffer re_comp_buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
683
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
684 char *
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
685 # ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
686 /* Make these definitions weak in libc, so POSIX programs can redefine
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
687 these names if they don't use our functions, and still use
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
688 regcomp/regexec above without link errors. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
689 weak_function
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
690 # endif
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
691 re_comp (s)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
692 const char *s;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
693 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
694 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
695 char *fastmap;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
696
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
697 if (!s)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
698 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
699 if (!re_comp_buf.buffer)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
700 return gettext ("No previous regular expression");
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
701 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
702 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
703
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
704 if (re_comp_buf.buffer)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
705 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
706 fastmap = re_comp_buf.fastmap;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
707 re_comp_buf.fastmap = NULL;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
708 __regfree (&re_comp_buf);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
709 memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
710 re_comp_buf.fastmap = fastmap;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
711 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
712
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
713 if (re_comp_buf.fastmap == NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
714 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
715 re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
716 if (re_comp_buf.fastmap == NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
717 return (char *) gettext (__re_error_msgid
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
718 + __re_error_msgid_idx[(int) REG_ESPACE]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
719 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
720
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
721 /* Since 're_exec' always passes NULL for the 'regs' argument, we
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
722 don't need to initialize the pattern buffer fields which affect it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
723
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
724 /* Match anchors at newlines. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
725 re_comp_buf.newline_anchor = 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
726
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
727 ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
728
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
729 if (!ret)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
730 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
731
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
732 /* Yes, we're discarding 'const' here if !HAVE_LIBINTL. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
733 return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
734 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
735
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
736 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
737 libc_freeres_fn (free_mem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
738 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
739 __regfree (&re_comp_buf);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
740 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
741 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
742
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
743 #endif /* _REGEX_RE_COMP */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
744
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
745 /* Internal entry point.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
746 Compile the regular expression PATTERN, whose length is LENGTH.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
747 SYNTAX indicate regular expression's syntax. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
748
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
749 static reg_errcode_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
750 re_compile_internal (regex_t *preg, const char * pattern, size_t length,
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
751 reg_syntax_t syntax)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
752 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
753 reg_errcode_t err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
754 re_dfa_t *dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
755 re_string_t regexp;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
756
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
757 /* Initialize the pattern buffer. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
758 preg->fastmap_accurate = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
759 preg->syntax = syntax;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
760 preg->not_bol = preg->not_eol = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
761 preg->used = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
762 preg->re_nsub = 0;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
763 preg->can_be_null = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
764 preg->regs_allocated = REGS_UNALLOCATED;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
765
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
766 /* Initialize the dfa. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
767 dfa = (re_dfa_t *) preg->buffer;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
768 if (BE (preg->allocated < sizeof (re_dfa_t), 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
769 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
770 /* If zero allocated, but buffer is non-null, try to realloc
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
771 enough space. This loses if buffer's address is bogus, but
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
772 that is the user's responsibility. If ->buffer is NULL this
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
773 is a simple allocation. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
774 dfa = re_realloc (preg->buffer, re_dfa_t, 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
775 if (dfa == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
776 return REG_ESPACE;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
777 preg->allocated = sizeof (re_dfa_t);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
778 preg->buffer = (unsigned char *) dfa;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
779 }
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
780 preg->used = sizeof (re_dfa_t);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
781
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
782 err = init_dfa (dfa, length);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
783 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
784 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
785 free_dfa_content (dfa);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
786 preg->buffer = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
787 preg->allocated = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
788 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
789 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
790 #ifdef DEBUG
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
791 /* Note: length+1 will not overflow since it is checked in init_dfa. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
792 dfa->re_str = re_malloc (char, length + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
793 strncpy (dfa->re_str, pattern, length + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
794 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
795
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
796 __libc_lock_init (dfa->lock);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
797
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
798 err = re_string_construct (&regexp, pattern, length, preg->translate,
10078
f47c913858de Fix violation of <stdbool.h> replacement in regex.
Eric Blake <ebb9@byu.net>
parents: 10075
diff changeset
799 (syntax & RE_ICASE) != 0, dfa);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
800 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
801 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
802 re_compile_internal_free_return:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
803 free_workarea_compile (preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
804 re_string_destruct (&regexp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
805 free_dfa_content (dfa);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
806 preg->buffer = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
807 preg->allocated = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
808 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
809 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
810
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
811 /* Parse the regular expression, and build a structure tree. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
812 preg->re_nsub = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
813 dfa->str_tree = parse (&regexp, preg, syntax, &err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
814 if (BE (dfa->str_tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
815 goto re_compile_internal_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
816
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
817 /* Analyze the tree and create the nfa. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
818 err = analyze (preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
819 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
820 goto re_compile_internal_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
821
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
822 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
823 /* If possible, do searching in single byte encoding to speed things up. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
824 if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
825 optimize_utf8 (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
826 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
827
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
828 /* Then create the initial state of the dfa. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
829 err = create_initial_state (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
830
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
831 /* Release work areas. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
832 free_workarea_compile (preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
833 re_string_destruct (&regexp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
834
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
835 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
836 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
837 free_dfa_content (dfa);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
838 preg->buffer = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
839 preg->allocated = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
840 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
841
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
842 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
843 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
844
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
845 /* Initialize DFA. We use the length of the regular expression PAT_LEN
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
846 as the initial length of some arrays. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
847
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
848 static reg_errcode_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
849 init_dfa (re_dfa_t *dfa, size_t pat_len)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
850 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
851 __re_size_t table_size;
12567
ceb1562f60a5 regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
852 #ifndef _LIBC
ceb1562f60a5 regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
853 char *codeset_name;
ceb1562f60a5 regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
854 #endif
6733
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
855 #ifdef RE_ENABLE_I18N
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
856 size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
857 #else
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
858 size_t max_i18n_object_size = 0;
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
859 #endif
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
860 size_t max_object_size =
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
861 MAX (sizeof (struct re_state_table_entry),
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
862 MAX (sizeof (re_token_t),
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
863 MAX (sizeof (re_node_set),
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
864 MAX (sizeof (regmatch_t),
6733
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
865 max_i18n_object_size))));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
866
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
867 memset (dfa, '\0', sizeof (re_dfa_t));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
868
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
869 /* Force allocation of str_tree_storage the first time. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
870 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
871
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
872 /* Avoid overflows. The extra "/ 2" is for the table_size doubling
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
873 calculation below, and for similar doubling calculations
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
874 elsewhere. And it's <= rather than <, because some of the
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
875 doubling calculations add 1 afterwards. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
876 if (BE (SIZE_MAX / max_object_size / 2 <= pat_len, 0))
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
877 return REG_ESPACE;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
878
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
879 dfa->nodes_alloc = pat_len + 1;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
880 dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
881
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
882 /* table_size = 2 ^ ceil(log pat_len) */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
883 for (table_size = 1; ; table_size <<= 1)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
884 if (table_size > pat_len)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
885 break;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
886
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
887 dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
888 dfa->state_hash_mask = table_size - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
889
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
890 dfa->mb_cur_max = MB_CUR_MAX;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
891 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
892 if (dfa->mb_cur_max == 6
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
893 && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
894 dfa->is_utf8 = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
895 dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
896 != 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
897 #else
12567
ceb1562f60a5 regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
898 codeset_name = nl_langinfo (CODESET);
ceb1562f60a5 regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
899 if (strcasecmp (codeset_name, "UTF-8") == 0
ceb1562f60a5 regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
900 || strcasecmp (codeset_name, "UTF8") == 0)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
901 dfa->is_utf8 = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
902
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
903 /* We check exhaustively in the loop below if this charset is a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
904 superset of ASCII. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
905 dfa->map_notascii = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
906 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
907
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
908 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
909 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
910 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
911 if (dfa->is_utf8)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
912 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
913 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
914 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
915 int i, j, ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
916
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
917 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
918 if (BE (dfa->sb_char == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
919 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
920
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
921 /* Set the bits corresponding to single byte chars. */
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
922 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
923 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
924 {
5972
aa260da0bbbe * config/srclist.txt: Comment out regcomp.c, since we have a porting fix
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
925 wint_t wch = __btowc (ch);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
926 if (wch != WEOF)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
927 dfa->sb_char[i] |= (bitset_word_t) 1 << j;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
928 # ifndef _LIBC
5972
aa260da0bbbe * config/srclist.txt: Comment out regcomp.c, since we have a porting fix
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
929 if (isascii (ch) && wch != ch)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
930 dfa->map_notascii = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
931 # endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
932 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
933 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
934 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
935 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
936
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
937 if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
938 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
939 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
940 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
941
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
942 /* Initialize WORD_CHAR table, which indicate which character is
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
943 "word". In this case "word" means that it is the word construction
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
944 character used by some operators like "\<", "\>", etc. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
945
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
946 static void
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
947 internal_function
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
948 init_word_char (re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
949 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
950 int i, j, ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
951 dfa->word_ops_used = 1;
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
952 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
953 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
954 if (isalnum (ch) || ch == '_')
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
955 dfa->word_char[i] |= (bitset_word_t) 1 << j;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
956 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
957
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
958 /* Free the work area which are only used while compiling. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
959
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
960 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
961 free_workarea_compile (regex_t *preg)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
962 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
963 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
964 bin_tree_storage_t *storage, *next;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
965 for (storage = dfa->str_tree_storage; storage; storage = next)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
966 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
967 next = storage->next;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
968 re_free (storage);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
969 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
970 dfa->str_tree_storage = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
971 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
972 dfa->str_tree = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
973 re_free (dfa->org_indices);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
974 dfa->org_indices = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
975 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
976
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
977 /* Create initial states for all contexts. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
978
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
979 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
980 create_initial_state (re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
981 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
982 Idx first, i;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
983 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
984 re_node_set init_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
985
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
986 /* Initial states have the epsilon closure of the node which is
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
987 the first node of the regular expression. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
988 first = dfa->str_tree->first->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
989 dfa->init_node = first;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
990 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
991 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
992 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
993
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
994 /* The back-references which are in initial states can epsilon transit,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
995 since in this case all of the subexpressions can be null.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
996 Then we add epsilon closures of the nodes which are the next nodes of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
997 the back-references. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
998 if (dfa->nbackref > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
999 for (i = 0; i < init_nodes.nelem; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1000 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1001 Idx node_idx = init_nodes.elems[i];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1002 re_token_type_t type = dfa->nodes[node_idx].type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1003
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1004 Idx clexp_idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1005 if (type != OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1006 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1007 for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1008 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1009 re_token_t *clexp_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1010 clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1011 if (clexp_node->type == OP_CLOSE_SUBEXP
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1012 && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1013 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1014 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1015 if (clexp_idx == init_nodes.nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1016 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1017
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1018 if (type == OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1019 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1020 Idx dest_idx = dfa->edests[node_idx].elems[0];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1021 if (!re_node_set_contains (&init_nodes, dest_idx))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1022 {
12847
64dad3a0ba71 regcomp.c: avoid a new -Wshadow warning
Jim Meyering <meyering@redhat.com>
parents: 12831
diff changeset
1023 reg_errcode_t merge_err
64dad3a0ba71 regcomp.c: avoid a new -Wshadow warning
Jim Meyering <meyering@redhat.com>
parents: 12831
diff changeset
1024 = re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
64dad3a0ba71 regcomp.c: avoid a new -Wshadow warning
Jim Meyering <meyering@redhat.com>
parents: 12831
diff changeset
1025 if (merge_err != REG_NOERROR)
64dad3a0ba71 regcomp.c: avoid a new -Wshadow warning
Jim Meyering <meyering@redhat.com>
parents: 12831
diff changeset
1026 return merge_err;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1027 i = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1028 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1029 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1030 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1031
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1032 /* It must be the first time to invoke acquire_state. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1033 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1034 /* We don't check ERR here, since the initial state must not be NULL. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1035 if (BE (dfa->init_state == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1036 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1037 if (dfa->init_state->has_constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1038 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1039 dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1040 CONTEXT_WORD);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1041 dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1042 CONTEXT_NEWLINE);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1043 dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1044 &init_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1045 CONTEXT_NEWLINE
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1046 | CONTEXT_BEGBUF);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1047 if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1048 || dfa->init_state_begbuf == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1049 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1050 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1051 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1052 dfa->init_state_word = dfa->init_state_nl
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1053 = dfa->init_state_begbuf = dfa->init_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1054
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1055 re_node_set_free (&init_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1056 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1057 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1058
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1059 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1060 /* If it is possible to do searching in single byte encoding instead of UTF-8
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1061 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1062 DFA nodes where needed. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1063
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1064 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1065 optimize_utf8 (re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1066 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1067 Idx node;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1068 int i;
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1069 bool mb_chars = false;
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1070 bool has_period = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1071
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1072 for (node = 0; node < dfa->nodes_len; ++node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1073 switch (dfa->nodes[node].type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1074 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1075 case CHARACTER:
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1076 if (dfa->nodes[node].opr.c >= ASCII_CHARS)
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1077 mb_chars = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1078 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1079 case ANCHOR:
9494
7cd817e07a16 Fix a 4-year-old used-uninitialized bug in regcomp.c.
Jim Meyering <meyering@redhat.com>
parents: 8153
diff changeset
1080 switch (dfa->nodes[node].opr.ctx_type)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1081 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1082 case LINE_FIRST:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1083 case LINE_LAST:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1084 case BUF_FIRST:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1085 case BUF_LAST:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1086 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1087 default:
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1088 /* Word anchors etc. cannot be handled. It's okay to test
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1089 opr.ctx_type since constraints (for all DFA nodes) are
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1090 created by ORing one or more opr.ctx_type values. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1091 return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1092 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1093 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1094 case OP_PERIOD:
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1095 has_period = true;
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1096 break;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1097 case OP_BACK_REF:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1098 case OP_ALT:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1099 case END_OF_RE:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1100 case OP_DUP_ASTERISK:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1101 case OP_OPEN_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1102 case OP_CLOSE_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1103 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1104 case COMPLEX_BRACKET:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1105 return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1106 case SIMPLE_BRACKET:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1107 /* Just double check. */
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1108 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1109 int rshift = (ASCII_CHARS % BITSET_WORD_BITS == 0
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1110 ? 0
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1111 : BITSET_WORD_BITS - ASCII_CHARS % BITSET_WORD_BITS);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1112 for (i = ASCII_CHARS / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1113 {
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1114 if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0)
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1115 return;
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1116 rshift = 0;
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1117 }
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1118 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1119 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1120 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1121 abort ();
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1122 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1123
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1124 if (mb_chars || has_period)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1125 for (node = 0; node < dfa->nodes_len; ++node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1126 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1127 if (dfa->nodes[node].type == CHARACTER
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1128 && dfa->nodes[node].opr.c >= ASCII_CHARS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1129 dfa->nodes[node].mb_partial = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1130 else if (dfa->nodes[node].type == OP_PERIOD)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1131 dfa->nodes[node].type = OP_UTF8_PERIOD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1132 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1133
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1134 /* The search can be in single byte locale. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1135 dfa->mb_cur_max = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1136 dfa->is_utf8 = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1137 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1138 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1139 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1140
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1141 /* Analyze the structure tree, and calculate "first", "next", "edest",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1142 "eclosure", and "inveclosure". */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1143
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1144 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1145 analyze (regex_t *preg)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1146 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1147 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1148 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1149
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1150 /* Allocate arrays. */
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1151 dfa->nexts = re_malloc (Idx, dfa->nodes_alloc);
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1152 dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1153 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1154 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1155 if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1156 || dfa->eclosures == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1157 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1158
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1159 dfa->subexp_map = re_malloc (Idx, preg->re_nsub);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1160 if (dfa->subexp_map != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1161 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1162 Idx i;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1163 for (i = 0; i < preg->re_nsub; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1164 dfa->subexp_map[i] = i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1165 preorder (dfa->str_tree, optimize_subexps, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1166 for (i = 0; i < preg->re_nsub; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1167 if (dfa->subexp_map[i] != i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1168 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1169 if (i == preg->re_nsub)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1170 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1171 free (dfa->subexp_map);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1172 dfa->subexp_map = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1173 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1174 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1175
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1176 ret = postorder (dfa->str_tree, lower_subexps, preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1177 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1178 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1179 ret = postorder (dfa->str_tree, calc_first, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1180 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1181 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1182 preorder (dfa->str_tree, calc_next, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1183 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1184 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1185 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1186 ret = calc_eclosure (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1187 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1188 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1189
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1190 /* We only need this during the prune_impossible_nodes pass in regexec.c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1191 skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1192 if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1193 || dfa->nbackref)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1194 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1195 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1196 if (BE (dfa->inveclosures == NULL, 0))
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1197 return REG_ESPACE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1198 ret = calc_inveclosure (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1199 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1200
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1201 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1202 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1203
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1204 /* Our parse trees are very unbalanced, so we cannot use a stack to
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1205 implement parse tree visits. Instead, we use parent pointers and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1206 some hairy code in these two functions. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1207 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1208 postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1209 void *extra)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1210 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1211 bin_tree_t *node, *prev;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1212
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1213 for (node = root; ; )
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1214 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1215 /* Descend down the tree, preferably to the left (or to the right
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1216 if that's the only child). */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1217 while (node->left || node->right)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1218 if (node->left)
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1219 node = node->left;
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1220 else
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1221 node = node->right;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1222
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1223 do
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1224 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1225 reg_errcode_t err = fn (extra, node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1226 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1227 return err;
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1228 if (node->parent == NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1229 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1230 prev = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1231 node = node->parent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1232 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1233 /* Go up while we have a node that is reached from the right. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1234 while (node->right == prev || node->right == NULL);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1235 node = node->right;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1236 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1237 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1238
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1239 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1240 preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1241 void *extra)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1242 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1243 bin_tree_t *node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1244
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1245 for (node = root; ; )
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1246 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1247 reg_errcode_t err = fn (extra, node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1248 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1249 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1250
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1251 /* Go to the left node, or up and to the right. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1252 if (node->left)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1253 node = node->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1254 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1255 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1256 bin_tree_t *prev = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1257 while (node->right == prev || node->right == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1258 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1259 prev = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1260 node = node->parent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1261 if (!node)
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1262 return REG_NOERROR;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1263 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1264 node = node->right;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1265 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1266 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1267 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1268
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1269 /* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1270 re_search_internal to map the inner one's opr.idx to this one's. Adjust
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1271 backreferences as well. Requires a preorder visit. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1272 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1273 optimize_subexps (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1274 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1275 re_dfa_t *dfa = (re_dfa_t *) extra;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1276
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1277 if (node->token.type == OP_BACK_REF && dfa->subexp_map)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1278 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1279 int idx = node->token.opr.idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1280 node->token.opr.idx = dfa->subexp_map[idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1281 dfa->used_bkref_map |= 1 << node->token.opr.idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1282 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1283
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1284 else if (node->token.type == SUBEXP
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1285 && node->left && node->left->token.type == SUBEXP)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1286 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1287 Idx other_idx = node->left->token.opr.idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1288
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1289 node->left = node->left->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1290 if (node->left)
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1291 node->left->parent = node;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1292
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1293 dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1294 if (other_idx < BITSET_WORD_BITS)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1295 dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1296 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1297
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1298 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1299 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1300
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1301 /* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1302 of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1303 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1304 lower_subexps (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1305 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1306 regex_t *preg = (regex_t *) extra;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1307 reg_errcode_t err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1308
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1309 if (node->left && node->left->token.type == SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1310 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1311 node->left = lower_subexp (&err, preg, node->left);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1312 if (node->left)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1313 node->left->parent = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1314 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1315 if (node->right && node->right->token.type == SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1316 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1317 node->right = lower_subexp (&err, preg, node->right);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1318 if (node->right)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1319 node->right->parent = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1320 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1321
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1322 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1323 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1324
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1325 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1326 lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1327 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1328 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1329 bin_tree_t *body = node->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1330 bin_tree_t *op, *cls, *tree1, *tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1331
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1332 if (preg->no_sub
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1333 /* We do not optimize empty subexpressions, because otherwise we may
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1334 have bad CONCAT nodes with NULL children. This is obviously not
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1335 very common, so we do not lose much. An example that triggers
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1336 this case is the sed "script" /\(\)/x. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1337 && node->left != NULL
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1338 && (node->token.opr.idx >= BITSET_WORD_BITS
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1339 || !(dfa->used_bkref_map
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1340 & ((bitset_word_t) 1 << node->token.opr.idx))))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1341 return node->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1342
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1343 /* Convert the SUBEXP node to the concatenation of an
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1344 OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1345 op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1346 cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1347 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1348 tree = create_tree (dfa, op, tree1, CONCAT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1349 if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1350 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1351 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1352 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1353 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1354
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1355 op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1356 op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1357 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1358 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1359
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1360 /* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1361 nodes. Requires a postorder visit. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1362 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1363 calc_first (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1364 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1365 re_dfa_t *dfa = (re_dfa_t *) extra;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1366 if (node->token.type == CONCAT)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1367 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1368 node->first = node->left->first;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1369 node->node_idx = node->left->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1370 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1371 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1372 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1373 node->first = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1374 node->node_idx = re_dfa_add_node (dfa, node->token);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1375 if (BE (node->node_idx == REG_MISSING, 0))
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1376 return REG_ESPACE;
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1377 if (node->token.type == ANCHOR)
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1378 dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1379 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1380 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1381 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1382
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1383 /* Pass 2: compute NEXT on the tree. Preorder visit. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1384 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1385 calc_next (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1386 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1387 switch (node->token.type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1388 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1389 case OP_DUP_ASTERISK:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1390 node->left->next = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1391 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1392 case CONCAT:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1393 node->left->next = node->right->first;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1394 node->right->next = node->next;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1395 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1396 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1397 if (node->left)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1398 node->left->next = node->next;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1399 if (node->right)
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
1400 node->right->next = node->next;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1401 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1402 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1403 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1404 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1405
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1406 /* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1407 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1408 link_nfa_nodes (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1409 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1410 re_dfa_t *dfa = (re_dfa_t *) extra;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1411 Idx idx = node->node_idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1412 reg_errcode_t err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1413
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1414 switch (node->token.type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1415 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1416 case CONCAT:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1417 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1418
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1419 case END_OF_RE:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1420 assert (node->next == NULL);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1421 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1422
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1423 case OP_DUP_ASTERISK:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1424 case OP_ALT:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1425 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1426 Idx left, right;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1427 dfa->has_plural_match = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1428 if (node->left != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1429 left = node->left->first->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1430 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1431 left = node->next->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1432 if (node->right != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1433 right = node->right->first->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1434 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1435 right = node->next->node_idx;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1436 assert (REG_VALID_INDEX (left));
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1437 assert (REG_VALID_INDEX (right));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1438 err = re_node_set_init_2 (dfa->edests + idx, left, right);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1439 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1440 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1441
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1442 case ANCHOR:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1443 case OP_OPEN_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1444 case OP_CLOSE_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1445 err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1446 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1447
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1448 case OP_BACK_REF:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1449 dfa->nexts[idx] = node->next->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1450 if (node->token.type == OP_BACK_REF)
12829
971957a253f8 regcomp.c: do not ignore internal return values
Jim Meyering <meyering@redhat.com>
parents: 12572
diff changeset
1451 err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1452 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1453
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1454 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1455 assert (!IS_EPSILON_NODE (node->token.type));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1456 dfa->nexts[idx] = node->next->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1457 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1458 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1459
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1460 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1461 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1462
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1463 /* Duplicate the epsilon closure of the node ROOT_NODE.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1464 Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1465 to their own constraint. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1466
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1467 static reg_errcode_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1468 internal_function
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1469 duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1470 Idx root_node, unsigned int init_constraint)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1471 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1472 Idx org_node, clone_node;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1473 bool ok;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1474 unsigned int constraint = init_constraint;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1475 for (org_node = top_org_node, clone_node = top_clone_node;;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1476 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1477 Idx org_dest, clone_dest;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1478 if (dfa->nodes[org_node].type == OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1479 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1480 /* If the back reference epsilon-transit, its destination must
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1481 also have the constraint. Then duplicate the epsilon closure
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1482 of the destination of the back reference, and store it in
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1483 edests of the back reference. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1484 org_dest = dfa->nexts[org_node];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1485 re_node_set_empty (dfa->edests + clone_node);
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1486 clone_dest = duplicate_node (dfa, org_dest, constraint);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1487 if (BE (clone_dest == REG_MISSING, 0))
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1488 return REG_ESPACE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1489 dfa->nexts[clone_node] = dfa->nexts[org_node];
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1490 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1491 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1492 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1493 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1494 else if (dfa->edests[org_node].nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1495 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1496 /* In case of the node can't epsilon-transit, don't duplicate the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1497 destination and store the original destination as the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1498 destination of the node. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1499 dfa->nexts[clone_node] = dfa->nexts[org_node];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1500 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1501 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1502 else if (dfa->edests[org_node].nelem == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1503 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1504 /* In case of the node can epsilon-transit, and it has only one
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1505 destination. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1506 org_dest = dfa->edests[org_node].elems[0];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1507 re_node_set_empty (dfa->edests + clone_node);
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1508 /* If the node is root_node itself, it means the epsilon closure
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1509 has a loop. Then tie it to the destination of the root_node. */
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1510 if (org_node == root_node && clone_node != org_node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1511 {
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1512 ok = re_node_set_insert (dfa->edests + clone_node, org_dest);
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1513 if (BE (! ok, 0))
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1514 return REG_ESPACE;
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1515 break;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1516 }
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1517 /* In case the node has another constraint, append it. */
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1518 constraint |= dfa->nodes[org_node].constraint;
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1519 clone_dest = duplicate_node (dfa, org_dest, constraint);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1520 if (BE (clone_dest == REG_MISSING, 0))
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1521 return REG_ESPACE;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1522 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1523 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1524 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1525 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1526 else /* dfa->edests[org_node].nelem == 2 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1527 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1528 /* In case of the node can epsilon-transit, and it has two
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1529 destinations. In the bin_tree_t and DFA, that's '|' and '*'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1530 org_dest = dfa->edests[org_node].elems[0];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1531 re_node_set_empty (dfa->edests + clone_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1532 /* Search for a duplicated node which satisfies the constraint. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1533 clone_dest = search_duplicated_node (dfa, org_dest, constraint);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1534 if (clone_dest == REG_MISSING)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1535 {
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1536 /* There is no such duplicated node, create a new one. */
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1537 reg_errcode_t err;
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1538 clone_dest = duplicate_node (dfa, org_dest, constraint);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1539 if (BE (clone_dest == REG_MISSING, 0))
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1540 return REG_ESPACE;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1541 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1542 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1543 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1544 err = duplicate_node_closure (dfa, org_dest, clone_dest,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1545 root_node, constraint);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1546 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1547 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1548 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1549 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1550 {
12569
51ea446bf1f8 regcomp: fix typo in comment
Jim Meyering <meyering@redhat.com>
parents: 12568
diff changeset
1551 /* There is a duplicated node which satisfies the constraint,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1552 use it to avoid infinite loop. */
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1553 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1554 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1555 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1556 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1557
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1558 org_dest = dfa->edests[org_node].elems[1];
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1559 clone_dest = duplicate_node (dfa, org_dest, constraint);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1560 if (BE (clone_dest == REG_MISSING, 0))
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1561 return REG_ESPACE;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1562 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1563 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1564 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1565 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1566 org_node = org_dest;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1567 clone_node = clone_dest;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1568 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1569 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1570 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1571
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1572 /* Search for a node which is duplicated from the node ORG_NODE, and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1573 satisfies the constraint CONSTRAINT. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1574
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1575 static Idx
6185
6b09f7f6ba73 * lib/regcomp.c (search_duplicated_node): Make first pointer arg
Paul Eggert <eggert@cs.ucla.edu>
parents: 6184
diff changeset
1576 search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1577 unsigned int constraint)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1578 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1579 Idx idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1580 for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1581 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1582 if (org_node == dfa->org_indices[idx]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1583 && constraint == dfa->nodes[idx].constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1584 return idx; /* Found. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1585 }
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1586 return REG_MISSING; /* Not found. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1587 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1588
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1589 /* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1590 Return the index of the new node, or REG_MISSING if insufficient storage is
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1591 available. */
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1592
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1593 static Idx
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1594 duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1595 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1596 Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1597 if (BE (dup_idx != REG_MISSING, 1))
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1598 {
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1599 dfa->nodes[dup_idx].constraint = constraint;
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1600 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1601 dfa->nodes[dup_idx].duplicated = 1;
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1602
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1603 /* Store the index of the original node. */
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1604 dfa->org_indices[dup_idx] = org_idx;
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1605 }
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1606 return dup_idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1607 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1608
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1609 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1610 calc_inveclosure (re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1611 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1612 Idx src, idx;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1613 bool ok;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1614 for (idx = 0; idx < dfa->nodes_len; ++idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1615 re_node_set_init_empty (dfa->inveclosures + idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1616
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1617 for (src = 0; src < dfa->nodes_len; ++src)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1618 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1619 Idx *elems = dfa->eclosures[src].elems;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1620 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1621 {
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1622 ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1623 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1624 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1625 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1626 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1627
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1628 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1629 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1630
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1631 /* Calculate "eclosure" for all the node in DFA. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1632
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1633 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1634 calc_eclosure (re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1635 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1636 Idx node_idx;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1637 bool incomplete;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1638 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1639 assert (dfa->nodes_len > 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1640 #endif
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1641 incomplete = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1642 /* For each nodes, calculate epsilon closure. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1643 for (node_idx = 0; ; ++node_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1644 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1645 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1646 re_node_set eclosure_elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1647 if (node_idx == dfa->nodes_len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1648 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1649 if (!incomplete)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1650 break;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1651 incomplete = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1652 node_idx = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1653 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1654
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1655 #ifdef DEBUG
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1656 assert (dfa->eclosures[node_idx].nelem != REG_MISSING);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1657 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1658
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1659 /* If we have already calculated, skip it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1660 if (dfa->eclosures[node_idx].nelem != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1661 continue;
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
1662 /* Calculate epsilon closure of 'node_idx'. */
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1663 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1664 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1665 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1666
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1667 if (dfa->eclosures[node_idx].nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1668 {
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1669 incomplete = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1670 re_node_set_free (&eclosure_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1671 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1672 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1673 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1674 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1675
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1676 /* Calculate epsilon closure of NODE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1677
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1678 static reg_errcode_t
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1679 calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1680 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1681 reg_errcode_t err;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1682 Idx i;
12831
00cfc5186819 regcomp.c: spelling and merge-artifact from glibc
Jim Meyering <meyering@redhat.com>
parents: 12830
diff changeset
1683 re_node_set eclosure;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1684 bool ok;
12831
00cfc5186819 regcomp.c: spelling and merge-artifact from glibc
Jim Meyering <meyering@redhat.com>
parents: 12830
diff changeset
1685 bool incomplete = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1686 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1687 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1688 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1689
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1690 /* This indicates that we are calculating this node now.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1691 We reference this value to avoid infinite loop. */
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1692 dfa->eclosures[node].nelem = REG_MISSING;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1693
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1694 /* If the current node has constraints, duplicate all nodes
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1695 since they must inherit the constraints. */
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1696 if (dfa->nodes[node].constraint
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1697 && dfa->edests[node].nelem
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1698 && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1699 {
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1700 err = duplicate_node_closure (dfa, node, node, node,
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1701 dfa->nodes[node].constraint);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1702 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1703 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1704 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1705
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1706 /* Expand each epsilon destination nodes. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1707 if (IS_EPSILON_NODE(dfa->nodes[node].type))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1708 for (i = 0; i < dfa->edests[node].nelem; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1709 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1710 re_node_set eclosure_elem;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1711 Idx edest = dfa->edests[node].elems[i];
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
1712 /* If calculating the epsilon closure of 'edest' is in progress,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1713 return intermediate result. */
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1714 if (dfa->eclosures[edest].nelem == REG_MISSING)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1715 {
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1716 incomplete = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1717 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1718 }
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
1719 /* If we haven't calculated the epsilon closure of 'edest' yet,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1720 calculate now. Otherwise use calculated epsilon closure. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1721 if (dfa->eclosures[edest].nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1722 {
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1723 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1724 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1725 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1726 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1727 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1728 eclosure_elem = dfa->eclosures[edest];
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
1729 /* Merge the epsilon closure of 'edest'. */
12829
971957a253f8 regcomp.c: do not ignore internal return values
Jim Meyering <meyering@redhat.com>
parents: 12572
diff changeset
1730 err = re_node_set_merge (&eclosure, &eclosure_elem);
971957a253f8 regcomp.c: do not ignore internal return values
Jim Meyering <meyering@redhat.com>
parents: 12572
diff changeset
1731 if (BE (err != REG_NOERROR, 0))
971957a253f8 regcomp.c: do not ignore internal return values
Jim Meyering <meyering@redhat.com>
parents: 12572
diff changeset
1732 return err;
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
1733 /* If the epsilon closure of 'edest' is incomplete,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1734 the epsilon closure of this node is also incomplete. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1735 if (dfa->eclosures[edest].nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1736 {
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1737 incomplete = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1738 re_node_set_free (&eclosure_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1739 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1740 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1741
12831
00cfc5186819 regcomp.c: spelling and merge-artifact from glibc
Jim Meyering <meyering@redhat.com>
parents: 12830
diff changeset
1742 /* An epsilon closure includes itself. */
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1743 ok = re_node_set_insert (&eclosure, node);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1744 if (BE (! ok, 0))
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1745 return REG_ESPACE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1746 if (incomplete && !root)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1747 dfa->eclosures[node].nelem = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1748 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1749 dfa->eclosures[node] = eclosure;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1750 *new_set = eclosure;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1751 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1752 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1753
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1754 /* Functions for token which are used in the parser. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1755
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1756 /* Fetch a token from INPUT.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1757 We must not use this function inside bracket expressions. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1758
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1759 static void
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1760 internal_function
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1761 fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1762 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1763 re_string_skip_bytes (input, peek_token (result, input, syntax));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1764 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1765
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1766 /* Peek a token from INPUT, and return the length of the token.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1767 We must not use this function inside bracket expressions. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1768
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1769 static int
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1770 internal_function
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1771 peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1772 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1773 unsigned char c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1774
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1775 if (re_string_eoi (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1776 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1777 token->type = END_OF_RE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1778 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1779 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1780
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1781 c = re_string_peek_byte (input, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1782 token->opr.c = c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1783
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1784 token->word_char = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1785 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1786 token->mb_partial = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1787 if (input->mb_cur_max > 1 &&
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1788 !re_string_first_byte (input, re_string_cur_idx (input)))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1789 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1790 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1791 token->mb_partial = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1792 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1793 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1794 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1795 if (c == '\\')
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1796 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1797 unsigned char c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1798 if (re_string_cur_idx (input) + 1 >= re_string_length (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1799 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1800 token->type = BACK_SLASH;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1801 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1802 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1803
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1804 c2 = re_string_peek_byte_case (input, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1805 token->opr.c = c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1806 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1807 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1808 if (input->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1809 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1810 wint_t wc = re_string_wchar_at (input,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1811 re_string_cur_idx (input) + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1812 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1813 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1814 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1815 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1816 token->word_char = IS_WORD_CHAR (c2) != 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1817
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1818 switch (c2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1819 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1820 case '|':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1821 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1822 token->type = OP_ALT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1823 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1824 case '1': case '2': case '3': case '4': case '5':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1825 case '6': case '7': case '8': case '9':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1826 if (!(syntax & RE_NO_BK_REFS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1827 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1828 token->type = OP_BACK_REF;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1829 token->opr.idx = c2 - '1';
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1830 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1831 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1832 case '<':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1833 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1834 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1835 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1836 token->opr.ctx_type = WORD_FIRST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1837 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1838 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1839 case '>':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1840 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1841 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1842 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1843 token->opr.ctx_type = WORD_LAST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1844 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1845 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1846 case 'b':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1847 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1848 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1849 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1850 token->opr.ctx_type = WORD_DELIM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1851 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1852 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1853 case 'B':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1854 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1855 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1856 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1857 token->opr.ctx_type = NOT_WORD_DELIM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1858 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1859 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1860 case 'w':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1861 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1862 token->type = OP_WORD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1863 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1864 case 'W':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1865 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1866 token->type = OP_NOTWORD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1867 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1868 case 's':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1869 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1870 token->type = OP_SPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1871 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1872 case 'S':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1873 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1874 token->type = OP_NOTSPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1875 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1876 case '`':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1877 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1878 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1879 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1880 token->opr.ctx_type = BUF_FIRST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1881 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1882 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1883 case '\'':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1884 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1885 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1886 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1887 token->opr.ctx_type = BUF_LAST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1888 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1889 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1890 case '(':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1891 if (!(syntax & RE_NO_BK_PARENS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1892 token->type = OP_OPEN_SUBEXP;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1893 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1894 case ')':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1895 if (!(syntax & RE_NO_BK_PARENS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1896 token->type = OP_CLOSE_SUBEXP;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1897 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1898 case '+':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1899 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1900 token->type = OP_DUP_PLUS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1901 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1902 case '?':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1903 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1904 token->type = OP_DUP_QUESTION;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1905 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1906 case '{':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1907 if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1908 token->type = OP_OPEN_DUP_NUM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1909 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1910 case '}':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1911 if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1912 token->type = OP_CLOSE_DUP_NUM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1913 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1914 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1915 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1916 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1917 return 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1918 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1919
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1920 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1921 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1922 if (input->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1923 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1924 wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1925 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1926 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1927 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1928 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1929 token->word_char = IS_WORD_CHAR (token->opr.c);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1930
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1931 switch (c)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1932 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1933 case '\n':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1934 if (syntax & RE_NEWLINE_ALT)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1935 token->type = OP_ALT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1936 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1937 case '|':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1938 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1939 token->type = OP_ALT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1940 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1941 case '*':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1942 token->type = OP_DUP_ASTERISK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1943 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1944 case '+':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1945 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1946 token->type = OP_DUP_PLUS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1947 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1948 case '?':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1949 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1950 token->type = OP_DUP_QUESTION;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1951 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1952 case '{':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1953 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1954 token->type = OP_OPEN_DUP_NUM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1955 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1956 case '}':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1957 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1958 token->type = OP_CLOSE_DUP_NUM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1959 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1960 case '(':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1961 if (syntax & RE_NO_BK_PARENS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1962 token->type = OP_OPEN_SUBEXP;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1963 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1964 case ')':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1965 if (syntax & RE_NO_BK_PARENS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1966 token->type = OP_CLOSE_SUBEXP;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1967 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1968 case '[':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1969 token->type = OP_OPEN_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1970 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1971 case '.':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1972 token->type = OP_PERIOD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1973 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1974 case '^':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1975 if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1976 re_string_cur_idx (input) != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1977 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1978 char prev = re_string_peek_byte (input, -1);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1979 if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1980 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1981 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1982 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1983 token->opr.ctx_type = LINE_FIRST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1984 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1985 case '$':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1986 if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1987 re_string_cur_idx (input) + 1 != re_string_length (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1988 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1989 re_token_t next;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1990 re_string_skip_bytes (input, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1991 peek_token (&next, input, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1992 re_string_skip_bytes (input, -1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1993 if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1994 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1995 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1996 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1997 token->opr.ctx_type = LINE_LAST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1998 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1999 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2000 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2001 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2002 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2003 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2004
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2005 /* Peek a token from INPUT, and return the length of the token.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2006 We must not use this function out of bracket expressions. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2007
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2008 static int
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2009 internal_function
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2010 peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2011 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2012 unsigned char c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2013 if (re_string_eoi (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2014 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2015 token->type = END_OF_RE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2016 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2017 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2018 c = re_string_peek_byte (input, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2019 token->opr.c = c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2020
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2021 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2022 if (input->mb_cur_max > 1 &&
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2023 !re_string_first_byte (input, re_string_cur_idx (input)))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2024 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2025 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2026 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2027 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2028 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2029
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2030 if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2031 && re_string_cur_idx (input) + 1 < re_string_length (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2032 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2033 /* In this case, '\' escape a character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2034 unsigned char c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2035 re_string_skip_bytes (input, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2036 c2 = re_string_peek_byte (input, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2037 token->opr.c = c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2038 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2039 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2040 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2041 if (c == '[') /* '[' is a special char in a bracket exps. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2042 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2043 unsigned char c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2044 int token_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2045 if (re_string_cur_idx (input) + 1 < re_string_length (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2046 c2 = re_string_peek_byte (input, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2047 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2048 c2 = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2049 token->opr.c = c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2050 token_len = 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2051 switch (c2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2052 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2053 case '.':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2054 token->type = OP_OPEN_COLL_ELEM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2055 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2056 case '=':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2057 token->type = OP_OPEN_EQUIV_CLASS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2058 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2059 case ':':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2060 if (syntax & RE_CHAR_CLASSES)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2061 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2062 token->type = OP_OPEN_CHAR_CLASS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2063 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2064 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2065 /* else fall through. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2066 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2067 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2068 token->opr.c = c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2069 token_len = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2070 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2071 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2072 return token_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2073 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2074 switch (c)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2075 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2076 case '-':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2077 token->type = OP_CHARSET_RANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2078 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2079 case ']':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2080 token->type = OP_CLOSE_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2081 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2082 case '^':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2083 token->type = OP_NON_MATCH_LIST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2084 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2085 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2086 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2087 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2088 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2089 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2090
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2091 /* Functions for parser. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2092
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2093 /* Entry point of the parser.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2094 Parse the regular expression REGEXP and return the structure tree.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2095 If an error is occured, ERR is set by error code, and return NULL.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2096 This function build the following tree, from regular expression <reg_exp>:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2097 CAT
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2098 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2099 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2100 <reg_exp> EOR
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2101
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2102 CAT means concatenation.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2103 EOR means end of regular expression. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2104
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2105 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2106 parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2107 reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2108 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2109 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2110 bin_tree_t *tree, *eor, *root;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2111 re_token_t current_token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2112 dfa->syntax = syntax;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2113 fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2114 tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2115 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2116 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2117 eor = create_tree (dfa, NULL, NULL, END_OF_RE);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2118 if (tree != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2119 root = create_tree (dfa, tree, eor, CONCAT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2120 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2121 root = eor;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2122 if (BE (eor == NULL || root == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2123 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2124 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2125 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2126 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2127 return root;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2128 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2129
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2130 /* This function build the following tree, from regular expression
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2131 <branch1>|<branch2>:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2132 ALT
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2133 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2134 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2135 <branch1> <branch2>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2136
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
2137 ALT means alternative, which represents the operator '|'. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2138
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2139 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2140 parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2141 reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2142 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2143 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2144 bin_tree_t *tree, *branch = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2145 tree = parse_branch (regexp, preg, token, syntax, nest, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2146 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2147 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2148
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2149 while (token->type == OP_ALT)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2150 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2151 fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2152 if (token->type != OP_ALT && token->type != END_OF_RE
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2153 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2154 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2155 branch = parse_branch (regexp, preg, token, syntax, nest, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2156 if (BE (*err != REG_NOERROR && branch == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2157 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2158 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2159 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2160 branch = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2161 tree = create_tree (dfa, tree, branch, OP_ALT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2162 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2163 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2164 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2165 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2166 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2167 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2168 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2169 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2170
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2171 /* This function build the following tree, from regular expression
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2172 <exp1><exp2>:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2173 CAT
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2174 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2175 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2176 <exp1> <exp2>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2177
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2178 CAT means concatenation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2179
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2180 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2181 parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2182 reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2183 {
7694
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2184 bin_tree_t *tree, *expr;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2185 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2186 tree = parse_expression (regexp, preg, token, syntax, nest, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2187 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2188 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2189
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2190 while (token->type != OP_ALT && token->type != END_OF_RE
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2191 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2192 {
7694
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2193 expr = parse_expression (regexp, preg, token, syntax, nest, err);
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2194 if (BE (*err != REG_NOERROR && expr == NULL, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2195 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2196 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2197 }
7694
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2198 if (tree != NULL && expr != NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2199 {
7694
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2200 tree = create_tree (dfa, tree, expr, CONCAT);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2201 if (tree == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2202 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2203 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2204 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2205 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2206 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2207 else if (tree == NULL)
7694
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2208 tree = expr;
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2209 /* Otherwise expr == NULL, we don't need to create new tree. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2210 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2211 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2212 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2213
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2214 /* This function build the following tree, from regular expression a*:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2215 *
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2216 |
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2217 a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2218 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2219
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2220 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2221 parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2222 reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2223 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2224 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2225 bin_tree_t *tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2226 switch (token->type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2227 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2228 case CHARACTER:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2229 tree = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2230 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2231 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2232 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2233 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2234 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2235 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2236 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2237 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2238 while (!re_string_eoi (regexp)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2239 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2240 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2241 bin_tree_t *mbc_remain;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2242 fetch_token (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2243 mbc_remain = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2244 tree = create_tree (dfa, tree, mbc_remain, CONCAT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2245 if (BE (mbc_remain == NULL || tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2246 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2247 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2248 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2249 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2250 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2251 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2252 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2253 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2254 case OP_OPEN_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2255 tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2256 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2257 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2258 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2259 case OP_OPEN_BRACKET:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2260 tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2261 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2262 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2263 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2264 case OP_BACK_REF:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2265 if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2266 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2267 *err = REG_ESUBREG;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2268 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2269 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2270 dfa->used_bkref_map |= 1 << token->opr.idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2271 tree = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2272 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2273 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2274 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2275 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2276 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2277 ++dfa->nbackref;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2278 dfa->has_mb_node = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2279 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2280 case OP_OPEN_DUP_NUM:
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2281 if (syntax & RE_CONTEXT_INVALID_DUP)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2282 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2283 *err = REG_BADRPT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2284 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2285 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2286 /* FALLTHROUGH */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2287 case OP_DUP_ASTERISK:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2288 case OP_DUP_PLUS:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2289 case OP_DUP_QUESTION:
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2290 if (syntax & RE_CONTEXT_INVALID_OPS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2291 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2292 *err = REG_BADRPT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2293 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2294 }
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2295 else if (syntax & RE_CONTEXT_INDEP_OPS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2296 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2297 fetch_token (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2298 return parse_expression (regexp, preg, token, syntax, nest, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2299 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2300 /* else fall through */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2301 case OP_CLOSE_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2302 if ((token->type == OP_CLOSE_SUBEXP) &&
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2303 !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2304 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2305 *err = REG_ERPAREN;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2306 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2307 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2308 /* else fall through */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2309 case OP_CLOSE_DUP_NUM:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2310 /* We treat it as a normal character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2311
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2312 /* Then we can these characters as normal characters. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2313 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2314 /* mb_partial and word_char bits should be initialized already
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2315 by peek_token. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2316 tree = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2317 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2318 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2319 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2320 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2321 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2322 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2323 case ANCHOR:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2324 if ((token->opr.ctx_type
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2325 & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2326 && dfa->word_ops_used == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2327 init_word_char (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2328 if (token->opr.ctx_type == WORD_DELIM
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2329 || token->opr.ctx_type == NOT_WORD_DELIM)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2330 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2331 bin_tree_t *tree_first, *tree_last;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2332 if (token->opr.ctx_type == WORD_DELIM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2333 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2334 token->opr.ctx_type = WORD_FIRST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2335 tree_first = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2336 token->opr.ctx_type = WORD_LAST;
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2337 }
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2338 else
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2339 {
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2340 token->opr.ctx_type = INSIDE_WORD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2341 tree_first = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2342 token->opr.ctx_type = INSIDE_NOTWORD;
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2343 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2344 tree_last = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2345 tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2346 if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2347 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2348 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2349 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2350 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2351 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2352 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2353 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2354 tree = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2355 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2356 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2357 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2358 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2359 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2360 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2361 /* We must return here, since ANCHORs can't be followed
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2362 by repetition operators.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2363 eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2364 it must not be "<ANCHOR(^)><REPEAT(*)>". */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2365 fetch_token (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2366 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2367 case OP_PERIOD:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2368 tree = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2369 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2370 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2371 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2372 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2373 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2374 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2375 dfa->has_mb_node = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2376 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2377 case OP_WORD:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2378 case OP_NOTWORD:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2379 tree = build_charclass_op (dfa, regexp->trans,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2380 (const unsigned char *) "alnum",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2381 (const unsigned char *) "_",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2382 token->type == OP_NOTWORD, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2383 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2384 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2385 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2386 case OP_SPACE:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2387 case OP_NOTSPACE:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2388 tree = build_charclass_op (dfa, regexp->trans,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2389 (const unsigned char *) "space",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2390 (const unsigned char *) "",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2391 token->type == OP_NOTSPACE, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2392 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2393 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2394 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2395 case OP_ALT:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2396 case END_OF_RE:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2397 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2398 case BACK_SLASH:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2399 *err = REG_EESCAPE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2400 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2401 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2402 /* Must not happen? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2403 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2404 assert (0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2405 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2406 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2407 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2408 fetch_token (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2409
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2410 while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2411 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2412 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2413 tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2414 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2415 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2416 /* In BRE consecutive duplications are not allowed. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2417 if ((syntax & RE_CONTEXT_INVALID_DUP)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2418 && (token->type == OP_DUP_ASTERISK
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2419 || token->type == OP_OPEN_DUP_NUM))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2420 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2421 *err = REG_BADRPT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2422 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2423 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2424 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2425
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2426 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2427 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2428
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2429 /* This function build the following tree, from regular expression
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2430 (<reg_exp>):
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2431 SUBEXP
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2432 |
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2433 <reg_exp>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2434 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2435
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2436 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2437 parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2438 reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2439 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2440 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2441 bin_tree_t *tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2442 size_t cur_nsub;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2443 cur_nsub = preg->re_nsub++;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2444
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2445 fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2446
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2447 /* The subexpression may be a null string. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2448 if (token->type == OP_CLOSE_SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2449 tree = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2450 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2451 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2452 tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2453 if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2454 *err = REG_EPAREN;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2455 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2456 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2457 }
6171
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6170
diff changeset
2458
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6170
diff changeset
2459 if (cur_nsub <= '9' - '1')
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6170
diff changeset
2460 dfa->completed_bkref_map |= 1 << cur_nsub;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2461
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2462 tree = create_tree (dfa, tree, NULL, SUBEXP);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2463 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2464 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2465 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2466 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2467 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2468 tree->token.opr.idx = cur_nsub;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2469 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2470 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2471
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2472 /* This function parse repetition operators like "*", "+", "{1,3}" etc. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2473
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2474 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2475 parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2476 re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2477 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2478 bin_tree_t *tree = NULL, *old_tree = NULL;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2479 Idx i, start, end, start_idx = re_string_cur_idx (regexp);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2480 re_token_t start_token = *token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2481
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2482 if (token->type == OP_OPEN_DUP_NUM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2483 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2484 end = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2485 start = fetch_number (regexp, token, syntax);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2486 if (start == REG_MISSING)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2487 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2488 if (token->type == CHARACTER && token->opr.c == ',')
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2489 start = 0; /* We treat "{,m}" as "{0,m}". */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2490 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2491 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2492 *err = REG_BADBR; /* <re>{} is invalid. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2493 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2494 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2495 }
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2496 if (BE (start != REG_ERROR, 1))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2497 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2498 /* We treat "{n}" as "{n,n}". */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2499 end = ((token->type == OP_CLOSE_DUP_NUM) ? start
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2500 : ((token->type == CHARACTER && token->opr.c == ',')
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2501 ? fetch_number (regexp, token, syntax) : REG_ERROR));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2502 }
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2503 if (BE (start == REG_ERROR || end == REG_ERROR, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2504 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2505 /* Invalid sequence. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2506 if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2507 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2508 if (token->type == END_OF_RE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2509 *err = REG_EBRACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2510 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2511 *err = REG_BADBR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2512
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2513 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2514 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2515
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2516 /* If the syntax bit is set, rollback. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2517 re_string_set_index (regexp, start_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2518 *token = start_token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2519 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2520 /* mb_partial and word_char bits should be already initialized by
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2521 peek_token. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2522 return elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2523 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2524
12570
3ed9d84fee81 regcomp: recognize ill-formed { } expressions
Ulrich Drepper <drepper@redhat.com>
parents: 12569
diff changeset
2525 if (BE ((end != REG_MISSING && start > end)
3ed9d84fee81 regcomp: recognize ill-formed { } expressions
Ulrich Drepper <drepper@redhat.com>
parents: 12569
diff changeset
2526 || token->type != OP_CLOSE_DUP_NUM, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2527 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2528 /* First number greater than second. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2529 *err = REG_BADBR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2530 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2531 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2532 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2533 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2534 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2535 start = (token->type == OP_DUP_PLUS) ? 1 : 0;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2536 end = (token->type == OP_DUP_QUESTION) ? 1 : REG_MISSING;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2537 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2538
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2539 fetch_token (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2540
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2541 if (BE (elem == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2542 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2543 if (BE (start == 0 && end == 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2544 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2545 postorder (elem, free_tree, NULL);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2546 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2547 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2548
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2549 /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2550 if (BE (start > 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2551 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2552 tree = elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2553 for (i = 2; i <= start; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2554 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2555 elem = duplicate_tree (elem, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2556 tree = create_tree (dfa, tree, elem, CONCAT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2557 if (BE (elem == NULL || tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2558 goto parse_dup_op_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2559 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2560
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2561 if (start == end)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2562 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2563
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2564 /* Duplicate ELEM before it is marked optional. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2565 elem = duplicate_tree (elem, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2566 old_tree = tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2567 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2568 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2569 old_tree = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2570
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2571 if (elem->token.type == SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2572 postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2573
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2574 tree = create_tree (dfa, elem, NULL,
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2575 (end == REG_MISSING ? OP_DUP_ASTERISK : OP_ALT));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2576 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2577 goto parse_dup_op_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2578
12848
69c1f6834276 regcomp.c: avoid the sole warning from gcc's -Wtype-limits
Jim Meyering <meyering@redhat.com>
parents: 12847
diff changeset
2579 /* From gnulib's "intprops.h":
69c1f6834276 regcomp.c: avoid the sole warning from gcc's -Wtype-limits
Jim Meyering <meyering@redhat.com>
parents: 12847
diff changeset
2580 True if the arithmetic type T is signed. */
69c1f6834276 regcomp.c: avoid the sole warning from gcc's -Wtype-limits
Jim Meyering <meyering@redhat.com>
parents: 12847
diff changeset
2581 #define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
69c1f6834276 regcomp.c: avoid the sole warning from gcc's -Wtype-limits
Jim Meyering <meyering@redhat.com>
parents: 12847
diff changeset
2582
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2583 /* This loop is actually executed only when end != REG_MISSING,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2584 to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2585 already created the start+1-th copy. */
12848
69c1f6834276 regcomp.c: avoid the sole warning from gcc's -Wtype-limits
Jim Meyering <meyering@redhat.com>
parents: 12847
diff changeset
2586 if (TYPE_SIGNED (Idx) || end != REG_MISSING)
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2587 for (i = start + 2; i <= end; ++i)
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2588 {
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2589 elem = duplicate_tree (elem, dfa);
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2590 tree = create_tree (dfa, tree, elem, CONCAT);
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2591 if (BE (elem == NULL || tree == NULL, 0))
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2592 goto parse_dup_op_espace;
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2593
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2594 tree = create_tree (dfa, tree, NULL, OP_ALT);
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2595 if (BE (tree == NULL, 0))
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2596 goto parse_dup_op_espace;
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2597 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2598
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2599 if (old_tree)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2600 tree = create_tree (dfa, old_tree, tree, CONCAT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2601
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2602 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2603
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2604 parse_dup_op_espace:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2605 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2606 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2607 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2608
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2609 /* Size of the names for collating symbol/equivalence_class/character_class.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2610 I'm not sure, but maybe enough. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2611 #define BRACKET_NAME_BUF_SIZE 32
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2612
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2613 #ifndef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2614 /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2615 Build the range expression which starts from START_ELEM, and ends
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2616 at END_ELEM. The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2617 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2618 mbcset->range_ends, is a pointer argument sinse we may
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2619 update it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2620
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2621 static reg_errcode_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2622 internal_function
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2623 # ifdef RE_ENABLE_I18N
13014
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2624 build_range_exp (const reg_syntax_t syntax,
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2625 bitset_t sbcset,
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2626 re_charset_t *mbcset,
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2627 Idx *range_alloc,
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2628 const bracket_elem_t *start_elem,
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2629 const bracket_elem_t *end_elem)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2630 # else /* not RE_ENABLE_I18N */
13014
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2631 build_range_exp (const reg_syntax_t syntax,
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2632 bitset_t sbcset,
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2633 const bracket_elem_t *start_elem,
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2634 const bracket_elem_t *end_elem)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2635 # endif /* not RE_ENABLE_I18N */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2636 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2637 unsigned int start_ch, end_ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2638 /* Equivalence Classes and Character Classes can't be a range start/end. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2639 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2640 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2641 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2642 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2643
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2644 /* We can handle no multi character collating elements without libc
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2645 support. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2646 if (BE ((start_elem->type == COLL_SYM
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2647 && strlen ((char *) start_elem->opr.name) > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2648 || (end_elem->type == COLL_SYM
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2649 && strlen ((char *) end_elem->opr.name) > 1), 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2650 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2651
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2652 # ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2653 {
5972
aa260da0bbbe * config/srclist.txt: Comment out regcomp.c, since we have a porting fix
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
2654 wchar_t wc;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2655 wint_t start_wc;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2656 wint_t end_wc;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2657 wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2658
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2659 start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2660 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2661 : 0));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2662 end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2663 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2664 : 0));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2665 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2666 ? __btowc (start_ch) : start_elem->opr.wch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2667 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2668 ? __btowc (end_ch) : end_elem->opr.wch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2669 if (start_wc == WEOF || end_wc == WEOF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2670 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2671 cmp_buf[0] = start_wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2672 cmp_buf[4] = end_wc;
13014
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2673
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2674 if (BE ((syntax & RE_NO_EMPTY_RANGES)
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
2675 && wcscoll (cmp_buf, cmp_buf + 4) > 0, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2676 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2677
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2678 /* Got valid collation sequence values, add them as a new entry.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2679 However, for !_LIBC we have no collation elements: if the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2680 character set is single byte, the single byte character set
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2681 that we build below suffices. parse_bracket_exp passes
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2682 no MBCSET if dfa->mb_cur_max == 1. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2683 if (mbcset)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2684 {
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2685 /* Check the space of the arrays. */
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2686 if (BE (*range_alloc == mbcset->nranges, 0))
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2687 {
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2688 /* There is not enough space, need realloc. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2689 wchar_t *new_array_start, *new_array_end;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2690 Idx new_nranges;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2691
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2692 /* +1 in case of mbcset->nranges is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2693 new_nranges = 2 * mbcset->nranges + 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2694 /* Use realloc since mbcset->range_starts and mbcset->range_ends
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2695 are NULL if *range_alloc == 0. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2696 new_array_start = re_realloc (mbcset->range_starts, wchar_t,
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2697 new_nranges);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2698 new_array_end = re_realloc (mbcset->range_ends, wchar_t,
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2699 new_nranges);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2700
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2701 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2702 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2703
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2704 mbcset->range_starts = new_array_start;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2705 mbcset->range_ends = new_array_end;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2706 *range_alloc = new_nranges;
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2707 }
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2708
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2709 mbcset->range_starts[mbcset->nranges] = start_wc;
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2710 mbcset->range_ends[mbcset->nranges++] = end_wc;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2711 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2712
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2713 /* Build the table for single byte characters. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2714 for (wc = 0; wc < SBC_MAX; ++wc)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2715 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2716 cmp_buf[2] = wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2717 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2718 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2719 bitset_set (sbcset, wc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2720 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2721 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2722 # else /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2723 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2724 unsigned int ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2725 start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2726 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2727 : 0));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2728 end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2729 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2730 : 0));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2731 if (start_ch > end_ch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2732 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2733 /* Build the table for single byte characters. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2734 for (ch = 0; ch < SBC_MAX; ++ch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2735 if (start_ch <= ch && ch <= end_ch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2736 bitset_set (sbcset, ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2737 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2738 # endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2739 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2740 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2741 #endif /* not _LIBC */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2742
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2743 #ifndef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2744 /* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2745 Build the collating element which is represented by NAME.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2746 The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2747 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2748 pointer argument since we may update it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2749
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2750 static reg_errcode_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2751 internal_function
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2752 build_collating_symbol (bitset_t sbcset,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2753 # ifdef RE_ENABLE_I18N
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2754 re_charset_t *mbcset, Idx *coll_sym_alloc,
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2755 # endif
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2756 const unsigned char *name)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2757 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2758 size_t name_len = strlen ((const char *) name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2759 if (BE (name_len != 1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2760 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2761 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2762 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2763 bitset_set (sbcset, name[0]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2764 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2765 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2766 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2767 #endif /* not _LIBC */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2768
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2769 /* This function parse bracket expression like "[abc]", "[a-c]",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2770 "[[.a-a.]]" etc. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2771
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2772 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2773 parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2774 reg_syntax_t syntax, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2775 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2776 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2777 const unsigned char *collseqmb;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2778 const char *collseqwc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2779 uint32_t nrules;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2780 int32_t table_size;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2781 const int32_t *symb_table;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2782 const unsigned char *extra;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2783
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2784 /* Local function for parse_bracket_exp used in _LIBC environement.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2785 Seek the collating symbol entry correspondings to NAME.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2786 Return the index of the symbol in the SYMB_TABLE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2787
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2788 auto inline int32_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2789 __attribute ((always_inline))
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2790 seek_collating_symbol_entry (name, name_len)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2791 const unsigned char *name;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2792 size_t name_len;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2793 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2794 int32_t hash = elem_hash ((const char *) name, name_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2795 int32_t elem = hash % table_size;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2796 if (symb_table[2 * elem] != 0)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2797 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2798 int32_t second = hash % (table_size - 2) + 1;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2799
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2800 do
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2801 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2802 /* First compare the hashing value. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2803 if (symb_table[2 * elem] == hash
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2804 /* Compare the length of the name. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2805 && name_len == extra[symb_table[2 * elem + 1]]
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2806 /* Compare the name. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2807 && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2808 name_len) == 0)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2809 {
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2810 /* Yep, this is the entry. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2811 break;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2812 }
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2813
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2814 /* Next entry. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2815 elem += second;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2816 }
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2817 while (symb_table[2 * elem] != 0);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2818 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2819 return elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2820 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2821
12571
64d47f001127 regcomp: skip collseq lookup when there are no rules
Ulrich Drepper <drepper@redhat.com>
parents: 12570
diff changeset
2822 /* Local function for parse_bracket_exp used in _LIBC environment.
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2823 Look up the collation sequence value of BR_ELEM.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2824 Return the value if succeeded, UINT_MAX otherwise. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2825
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2826 auto inline unsigned int
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2827 __attribute ((always_inline))
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2828 lookup_collation_sequence_value (br_elem)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2829 bracket_elem_t *br_elem;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2830 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2831 if (br_elem->type == SB_CHAR)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2832 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2833 /*
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2834 if (MB_CUR_MAX == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2835 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2836 if (nrules == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2837 return collseqmb[br_elem->opr.ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2838 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2839 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2840 wint_t wc = __btowc (br_elem->opr.ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2841 return __collseq_table_lookup (collseqwc, wc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2842 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2843 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2844 else if (br_elem->type == MB_CHAR)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2845 {
12571
64d47f001127 regcomp: skip collseq lookup when there are no rules
Ulrich Drepper <drepper@redhat.com>
parents: 12570
diff changeset
2846 if (nrules != 0)
64d47f001127 regcomp: skip collseq lookup when there are no rules
Ulrich Drepper <drepper@redhat.com>
parents: 12570
diff changeset
2847 return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2848 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2849 else if (br_elem->type == COLL_SYM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2850 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2851 size_t sym_name_len = strlen ((char *) br_elem->opr.name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2852 if (nrules != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2853 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2854 int32_t elem, idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2855 elem = seek_collating_symbol_entry (br_elem->opr.name,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2856 sym_name_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2857 if (symb_table[2 * elem] != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2858 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2859 /* We found the entry. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2860 idx = symb_table[2 * elem + 1];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2861 /* Skip the name of collating element name. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2862 idx += 1 + extra[idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2863 /* Skip the byte sequence of the collating element. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2864 idx += 1 + extra[idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2865 /* Adjust for the alignment. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2866 idx = (idx + 3) & ~3;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2867 /* Skip the multibyte collation sequence value. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2868 idx += sizeof (unsigned int);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2869 /* Skip the wide char sequence of the collating element. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2870 idx += sizeof (unsigned int) *
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2871 (1 + *(unsigned int *) (extra + idx));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2872 /* Return the collation sequence value. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2873 return *(unsigned int *) (extra + idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2874 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2875 else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2876 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2877 /* No valid character. Match it as a single byte
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2878 character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2879 return collseqmb[br_elem->opr.name[0]];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2880 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2881 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2882 else if (sym_name_len == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2883 return collseqmb[br_elem->opr.name[0]];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2884 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2885 return UINT_MAX;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2886 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2887
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2888 /* Local function for parse_bracket_exp used in _LIBC environement.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2889 Build the range expression which starts from START_ELEM, and ends
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2890 at END_ELEM. The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2891 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2892 mbcset->range_ends, is a pointer argument sinse we may
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2893 update it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2894
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2895 auto inline reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2896 __attribute ((always_inline))
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2897 build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2898 re_charset_t *mbcset;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2899 Idx *range_alloc;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2900 bitset_t sbcset;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2901 bracket_elem_t *start_elem, *end_elem;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2902 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2903 unsigned int ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2904 uint32_t start_collseq;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2905 uint32_t end_collseq;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2906
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2907 /* Equivalence Classes and Character Classes can't be a range
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2908 start/end. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2909 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2910 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2911 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2912 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2913
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2914 start_collseq = lookup_collation_sequence_value (start_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2915 end_collseq = lookup_collation_sequence_value (end_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2916 /* Check start/end collation sequence values. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2917 if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2918 return REG_ECOLLATE;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2919 if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2920 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2921
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2922 /* Got valid collation sequence values, add them as a new entry.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2923 However, if we have no collation elements, and the character set
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2924 is single byte, the single byte character set that we
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2925 build below suffices. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2926 if (nrules > 0 || dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2927 {
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2928 /* Check the space of the arrays. */
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2929 if (BE (*range_alloc == mbcset->nranges, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2930 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2931 /* There is not enough space, need realloc. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2932 uint32_t *new_array_start;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2933 uint32_t *new_array_end;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2934 Idx new_nranges;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2935
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2936 /* +1 in case of mbcset->nranges is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2937 new_nranges = 2 * mbcset->nranges + 1;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2938 new_array_start = re_realloc (mbcset->range_starts, uint32_t,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2939 new_nranges);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2940 new_array_end = re_realloc (mbcset->range_ends, uint32_t,
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2941 new_nranges);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2942
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2943 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2944 return REG_ESPACE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2945
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2946 mbcset->range_starts = new_array_start;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2947 mbcset->range_ends = new_array_end;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2948 *range_alloc = new_nranges;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2949 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2950
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2951 mbcset->range_starts[mbcset->nranges] = start_collseq;
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
2952 mbcset->range_ends[mbcset->nranges++] = end_collseq;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2953 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2954
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2955 /* Build the table for single byte characters. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2956 for (ch = 0; ch < SBC_MAX; ch++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2957 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2958 uint32_t ch_collseq;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2959 /*
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2960 if (MB_CUR_MAX == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2961 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2962 if (nrules == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2963 ch_collseq = collseqmb[ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2964 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2965 ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2966 if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2967 bitset_set (sbcset, ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2968 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2969 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2970 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2971
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2972 /* Local function for parse_bracket_exp used in _LIBC environement.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2973 Build the collating element which is represented by NAME.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2974 The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2975 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2976 pointer argument sinse we may update it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2977
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2978 auto inline reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2979 __attribute ((always_inline))
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2980 build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2981 re_charset_t *mbcset;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2982 Idx *coll_sym_alloc;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2983 bitset_t sbcset;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2984 const unsigned char *name;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2985 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2986 int32_t elem, idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2987 size_t name_len = strlen ((const char *) name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2988 if (nrules != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2989 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2990 elem = seek_collating_symbol_entry (name, name_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2991 if (symb_table[2 * elem] != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2992 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2993 /* We found the entry. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2994 idx = symb_table[2 * elem + 1];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2995 /* Skip the name of collating element name. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2996 idx += 1 + extra[idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2997 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2998 else if (symb_table[2 * elem] == 0 && name_len == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2999 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3000 /* No valid character, treat it as a normal
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3001 character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3002 bitset_set (sbcset, name[0]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3003 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3004 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3005 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3006 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3007
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3008 /* Got valid collation sequence, add it as a new entry. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3009 /* Check the space of the arrays. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3010 if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3011 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3012 /* Not enough, realloc it. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3013 /* +1 in case of mbcset->ncoll_syms is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3014 Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3015 /* Use realloc since mbcset->coll_syms is NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3016 if *alloc == 0. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3017 int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3018 new_coll_sym_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3019 if (BE (new_coll_syms == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3020 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3021 mbcset->coll_syms = new_coll_syms;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3022 *coll_sym_alloc = new_coll_sym_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3023 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3024 mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3025 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3026 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3027 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3028 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3029 if (BE (name_len != 1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3030 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3031 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3032 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3033 bitset_set (sbcset, name[0]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3034 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3035 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3036 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3037 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3038 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3039
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3040 re_token_t br_token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3041 re_bitset_ptr_t sbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3042 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3043 re_charset_t *mbcset;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3044 Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3045 Idx equiv_class_alloc = 0, char_class_alloc = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3046 #endif /* not RE_ENABLE_I18N */
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3047 bool non_match = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3048 bin_tree_t *work_tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3049 int token_len;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3050 bool first_round = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3051 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3052 collseqmb = (const unsigned char *)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3053 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3054 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3055 if (nrules)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3056 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3057 /*
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3058 if (MB_CUR_MAX > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3059 */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3060 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3061 table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3062 symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3063 _NL_COLLATE_SYMB_TABLEMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3064 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3065 _NL_COLLATE_SYMB_EXTRAMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3066 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3067 #endif
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3068 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3069 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3070 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3071 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3072 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3073 if (BE (sbcset == NULL || mbcset == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3074 #else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3075 if (BE (sbcset == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3076 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3077 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3078 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3079 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3080 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3081
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3082 token_len = peek_token_bracket (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3083 if (BE (token->type == END_OF_RE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3084 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3085 *err = REG_BADPAT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3086 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3087 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3088 if (token->type == OP_NON_MATCH_LIST)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3089 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3090 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3091 mbcset->non_match = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3092 #endif /* not RE_ENABLE_I18N */
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3093 non_match = true;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3094 if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
8110
8428e492271f 2007-02-05 Paolo Bonzini <bonzini@gnu.org>
Paolo Bonzini <bonzini@gnu.org>
parents: 8073
diff changeset
3095 bitset_set (sbcset, '\n');
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3096 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3097 token_len = peek_token_bracket (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3098 if (BE (token->type == END_OF_RE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3099 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3100 *err = REG_BADPAT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3101 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3102 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3103 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3104
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3105 /* We treat the first ']' as a normal character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3106 if (token->type == OP_CLOSE_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3107 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3108
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3109 while (1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3110 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3111 bracket_elem_t start_elem, end_elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3112 unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3113 unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3114 reg_errcode_t ret;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3115 int token_len2 = 0;
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3116 bool is_range_exp = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3117 re_token_t token2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3118
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3119 start_elem.opr.name = start_name_buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3120 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3121 syntax, first_round);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3122 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3123 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3124 *err = ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3125 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3126 }
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3127 first_round = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3128
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3129 /* Get information about the next token. We need it in any case. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3130 token_len = peek_token_bracket (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3131
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3132 /* Do not check for ranges if we know they are not allowed. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3133 if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3134 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3135 if (BE (token->type == END_OF_RE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3136 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3137 *err = REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3138 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3139 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3140 if (token->type == OP_CHARSET_RANGE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3141 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3142 re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3143 token_len2 = peek_token_bracket (&token2, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3144 if (BE (token2.type == END_OF_RE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3145 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3146 *err = REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3147 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3148 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3149 if (token2.type == OP_CLOSE_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3150 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3151 /* We treat the last '-' as a normal character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3152 re_string_skip_bytes (regexp, -token_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3153 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3154 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3155 else
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3156 is_range_exp = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3157 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3158 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3159
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3160 if (is_range_exp == true)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3161 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3162 end_elem.opr.name = end_name_buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3163 ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3164 dfa, syntax, true);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3165 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3166 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3167 *err = ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3168 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3169 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3170
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3171 token_len = peek_token_bracket (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3172
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3173 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3174 *err = build_range_exp (sbcset, mbcset, &range_alloc,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3175 &start_elem, &end_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3176 #else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3177 # ifdef RE_ENABLE_I18N
13014
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
3178 *err = build_range_exp (syntax, sbcset,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3179 dfa->mb_cur_max > 1 ? mbcset : NULL,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3180 &range_alloc, &start_elem, &end_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3181 # else
13014
6240e99d3e0c regcomp.c: make non-_LIBC implementation of build_range_exp consistent
Jim Meyering <meyering@redhat.com>
parents: 12848
diff changeset
3182 *err = build_range_exp (syntax, sbcset, &start_elem, &end_elem);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3183 # endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3184 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3185 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3186 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3187 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3188 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3189 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3190 switch (start_elem.type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3191 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3192 case SB_CHAR:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3193 bitset_set (sbcset, start_elem.opr.ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3194 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3195 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3196 case MB_CHAR:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3197 /* Check whether the array has enough space. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3198 if (BE (mbchar_alloc == mbcset->nmbchars, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3199 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3200 wchar_t *new_mbchars;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3201 /* Not enough, realloc it. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3202 /* +1 in case of mbcset->nmbchars is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3203 mbchar_alloc = 2 * mbcset->nmbchars + 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3204 /* Use realloc since array is NULL if *alloc == 0. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3205 new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3206 mbchar_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3207 if (BE (new_mbchars == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3208 goto parse_bracket_exp_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3209 mbcset->mbchars = new_mbchars;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3210 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3211 mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3212 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3213 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3214 case EQUIV_CLASS:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3215 *err = build_equiv_class (sbcset,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3216 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3217 mbcset, &equiv_class_alloc,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3218 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3219 start_elem.opr.name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3220 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3221 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3222 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3223 case COLL_SYM:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3224 *err = build_collating_symbol (sbcset,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3225 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3226 mbcset, &coll_sym_alloc,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3227 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3228 start_elem.opr.name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3229 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3230 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3231 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3232 case CHAR_CLASS:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3233 *err = build_charclass (regexp->trans, sbcset,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3234 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3235 mbcset, &char_class_alloc,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3236 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3237 start_elem.opr.name, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3238 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3239 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3240 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3241 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3242 assert (0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3243 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3244 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3245 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3246 if (BE (token->type == END_OF_RE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3247 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3248 *err = REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3249 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3250 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3251 if (token->type == OP_CLOSE_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3252 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3253 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3254
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3255 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3256
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3257 /* If it is non-matching list. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3258 if (non_match)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3259 bitset_not (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3260
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3261 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3262 /* Ensure only single byte characters are set. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3263 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3264 bitset_mask (sbcset, dfa->sb_char);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3265
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3266 if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3267 || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3268 || mbcset->non_match)))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3269 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3270 bin_tree_t *mbc_tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3271 int sbc_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3272 /* Build a tree for complex bracket. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3273 dfa->has_mb_node = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3274 br_token.type = COMPLEX_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3275 br_token.opr.mbcset = mbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3276 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3277 if (BE (mbc_tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3278 goto parse_bracket_exp_espace;
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
3279 for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3280 if (sbcset[sbc_idx])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3281 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3282 /* If there are no bits set in sbcset, there is no point
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3283 of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
3284 if (sbc_idx < BITSET_WORDS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3285 {
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3286 /* Build a tree for simple bracket. */
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3287 br_token.type = SIMPLE_BRACKET;
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3288 br_token.opr.sbcset = sbcset;
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3289 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3290 if (BE (work_tree == NULL, 0))
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3291 goto parse_bracket_exp_espace;
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3292
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3293 /* Then join them by ALT node. */
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3294 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3295 if (BE (work_tree == NULL, 0))
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3296 goto parse_bracket_exp_espace;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3297 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3298 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3299 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3300 re_free (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3301 work_tree = mbc_tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3302 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3303 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3304 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3305 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3306 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3307 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3308 free_charset (mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3309 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3310 /* Build a tree for simple bracket. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3311 br_token.type = SIMPLE_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3312 br_token.opr.sbcset = sbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3313 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3314 if (BE (work_tree == NULL, 0))
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3315 goto parse_bracket_exp_espace;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3316 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3317 return work_tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3318
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3319 parse_bracket_exp_espace:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3320 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3321 parse_bracket_exp_free_return:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3322 re_free (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3323 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3324 free_charset (mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3325 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3326 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3327 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3328
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3329 /* Parse an element in the bracket expression. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3330
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3331 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3332 parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3333 re_token_t *token, int token_len, re_dfa_t *dfa,
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3334 reg_syntax_t syntax, bool accept_hyphen)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3335 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3336 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3337 int cur_char_size;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3338 cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3339 if (cur_char_size > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3340 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3341 elem->type = MB_CHAR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3342 elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3343 re_string_skip_bytes (regexp, cur_char_size);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3344 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3345 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3346 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3347 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3348 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3349 || token->type == OP_OPEN_EQUIV_CLASS)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3350 return parse_bracket_symbol (elem, regexp, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3351 if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3352 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3353 /* A '-' must only appear as anything but a range indicator before
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3354 the closing bracket. Everything else is an error. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3355 re_token_t token2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3356 (void) peek_token_bracket (&token2, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3357 if (token2.type != OP_CLOSE_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3358 /* The actual error value is not standardized since this whole
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3359 case is undefined. But ERANGE makes good sense. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3360 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3361 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3362 elem->type = SB_CHAR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3363 elem->opr.ch = token->opr.c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3364 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3365 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3366
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3367 /* Parse a bracket symbol in the bracket expression. Bracket symbols are
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3368 such as [:<character_class>:], [.<collating_element>.], and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3369 [=<equivalent_class>=]. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3370
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3371 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3372 parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3373 re_token_t *token)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3374 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3375 unsigned char ch, delim = token->opr.c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3376 int i = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3377 if (re_string_eoi(regexp))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3378 return REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3379 for (;; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3380 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3381 if (i >= BRACKET_NAME_BUF_SIZE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3382 return REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3383 if (token->type == OP_OPEN_CHAR_CLASS)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3384 ch = re_string_fetch_byte_case (regexp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3385 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3386 ch = re_string_fetch_byte (regexp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3387 if (re_string_eoi(regexp))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3388 return REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3389 if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3390 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3391 elem->opr.name[i] = ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3392 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3393 re_string_skip_bytes (regexp, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3394 elem->opr.name[i] = '\0';
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3395 switch (token->type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3396 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3397 case OP_OPEN_COLL_ELEM:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3398 elem->type = COLL_SYM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3399 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3400 case OP_OPEN_EQUIV_CLASS:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3401 elem->type = EQUIV_CLASS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3402 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3403 case OP_OPEN_CHAR_CLASS:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3404 elem->type = CHAR_CLASS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3405 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3406 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3407 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3408 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3409 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3410 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3411
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3412 /* Helper function for parse_bracket_exp.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3413 Build the equivalence class which is represented by NAME.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3414 The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3415 EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3416 is a pointer argument sinse we may update it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3417
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3418 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3419 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3420 build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3421 Idx *equiv_class_alloc, const unsigned char *name)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3422 #else /* not RE_ENABLE_I18N */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3423 build_equiv_class (bitset_t sbcset, const unsigned char *name)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3424 #endif /* not RE_ENABLE_I18N */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3425 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3426 #ifdef _LIBC
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3427 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3428 if (nrules != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3429 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3430 const int32_t *table, *indirect;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3431 const unsigned char *weights, *extra, *cp;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3432 unsigned char char_buf[2];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3433 int32_t idx1, idx2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3434 unsigned int ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3435 size_t len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3436 /* This #include defines a local function! */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3437 # include <locale/weight.h>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3438 /* Calculate the index for equivalence class. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3439 cp = name;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3440 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3441 weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3442 _NL_COLLATE_WEIGHTMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3443 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3444 _NL_COLLATE_EXTRAMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3445 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3446 _NL_COLLATE_INDIRECTMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3447 idx1 = findidx (&cp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3448 if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3449 /* This isn't a valid character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3450 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3451
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3452 /* Build single byte matcing table for this equivalence class. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3453 char_buf[1] = (unsigned char) '\0';
12572
b11c0a312a68 regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents: 12571
diff changeset
3454 len = weights[idx1 & 0xffffff];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3455 for (ch = 0; ch < SBC_MAX; ++ch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3456 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3457 char_buf[0] = ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3458 cp = char_buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3459 idx2 = findidx (&cp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3460 /*
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3461 idx2 = table[ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3462 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3463 if (idx2 == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3464 /* This isn't a valid character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3465 continue;
12572
b11c0a312a68 regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents: 12571
diff changeset
3466 /* Compare only if the length matches and the collation rule
b11c0a312a68 regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents: 12571
diff changeset
3467 index is the same. */
b11c0a312a68 regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents: 12571
diff changeset
3468 if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3469 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3470 int cnt = 0;
12572
b11c0a312a68 regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents: 12571
diff changeset
3471
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3472 while (cnt <= len &&
12572
b11c0a312a68 regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents: 12571
diff changeset
3473 weights[(idx1 & 0xffffff) + 1 + cnt]
b11c0a312a68 regcomp, regexec, fnmatch: avoid array bounds read error
Ulrich Drepper <drepper@redhat.com>
parents: 12571
diff changeset
3474 == weights[(idx2 & 0xffffff) + 1 + cnt])
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3475 ++cnt;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3476
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3477 if (cnt > len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3478 bitset_set (sbcset, ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3479 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3480 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3481 /* Check whether the array has enough space. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3482 if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3483 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3484 /* Not enough, realloc it. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3485 /* +1 in case of mbcset->nequiv_classes is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3486 Idx new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3487 /* Use realloc since the array is NULL if *alloc == 0. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3488 int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3489 int32_t,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3490 new_equiv_class_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3491 if (BE (new_equiv_classes == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3492 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3493 mbcset->equiv_classes = new_equiv_classes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3494 *equiv_class_alloc = new_equiv_class_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3495 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3496 mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3497 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3498 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3499 #endif /* _LIBC */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3500 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3501 if (BE (strlen ((const char *) name) != 1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3502 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3503 bitset_set (sbcset, *name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3504 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3505 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3506 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3507
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3508 /* Helper function for parse_bracket_exp.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3509 Build the character class which is represented by NAME.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3510 The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3511 CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3512 is a pointer argument sinse we may update it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3513
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3514 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3515 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3516 build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3517 re_charset_t *mbcset, Idx *char_class_alloc,
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3518 const unsigned char *class_name, reg_syntax_t syntax)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3519 #else /* not RE_ENABLE_I18N */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3520 build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3521 const unsigned char *class_name, reg_syntax_t syntax)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3522 #endif /* not RE_ENABLE_I18N */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3523 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3524 int i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3525 const char *name = (const char *) class_name;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3526
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3527 /* In case of REG_ICASE "upper" and "lower" match the both of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3528 upper and lower cases. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3529 if ((syntax & RE_ICASE)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3530 && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3531 name = "alpha";
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3532
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3533 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3534 /* Check the space of the arrays. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3535 if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3536 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3537 /* Not enough, realloc it. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3538 /* +1 in case of mbcset->nchar_classes is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3539 Idx new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3540 /* Use realloc since array is NULL if *alloc == 0. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3541 wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3542 new_char_class_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3543 if (BE (new_char_classes == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3544 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3545 mbcset->char_classes = new_char_classes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3546 *char_class_alloc = new_char_class_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3547 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3548 mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3549 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3550
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3551 #define BUILD_CHARCLASS_LOOP(ctype_func) \
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3552 do { \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3553 if (BE (trans != NULL, 0)) \
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3554 { \
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3555 for (i = 0; i < SBC_MAX; ++i) \
6729
c5495b5c3f32 Fix space-tab problem. From Jim Meyering.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
3556 if (ctype_func (i)) \
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3557 bitset_set (sbcset, trans[i]); \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3558 } \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3559 else \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3560 { \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3561 for (i = 0; i < SBC_MAX; ++i) \
6729
c5495b5c3f32 Fix space-tab problem. From Jim Meyering.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
3562 if (ctype_func (i)) \
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3563 bitset_set (sbcset, i); \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3564 } \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3565 } while (0)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3566
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3567 if (strcmp (name, "alnum") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3568 BUILD_CHARCLASS_LOOP (isalnum);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3569 else if (strcmp (name, "cntrl") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3570 BUILD_CHARCLASS_LOOP (iscntrl);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3571 else if (strcmp (name, "lower") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3572 BUILD_CHARCLASS_LOOP (islower);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3573 else if (strcmp (name, "space") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3574 BUILD_CHARCLASS_LOOP (isspace);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3575 else if (strcmp (name, "alpha") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3576 BUILD_CHARCLASS_LOOP (isalpha);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3577 else if (strcmp (name, "digit") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3578 BUILD_CHARCLASS_LOOP (isdigit);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3579 else if (strcmp (name, "print") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3580 BUILD_CHARCLASS_LOOP (isprint);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3581 else if (strcmp (name, "upper") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3582 BUILD_CHARCLASS_LOOP (isupper);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3583 else if (strcmp (name, "blank") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3584 BUILD_CHARCLASS_LOOP (isblank);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3585 else if (strcmp (name, "graph") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3586 BUILD_CHARCLASS_LOOP (isgraph);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3587 else if (strcmp (name, "punct") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3588 BUILD_CHARCLASS_LOOP (ispunct);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3589 else if (strcmp (name, "xdigit") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3590 BUILD_CHARCLASS_LOOP (isxdigit);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3591 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3592 return REG_ECTYPE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3593
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3594 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3595 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3596
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3597 static bin_tree_t *
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3598 build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3599 const unsigned char *class_name,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3600 const unsigned char *extra, bool non_match,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3601 reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3602 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3603 re_bitset_ptr_t sbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3604 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3605 re_charset_t *mbcset;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3606 Idx alloc = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3607 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3608 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3609 re_token_t br_token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3610 bin_tree_t *tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3611
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3612 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3613 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3614 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3615 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3616
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3617 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3618 if (BE (sbcset == NULL || mbcset == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3619 #else /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3620 if (BE (sbcset == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3621 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3622 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3623 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3624 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3625 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3626
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3627 if (non_match)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3628 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3629 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3630 mbcset->non_match = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3631 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3632 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3633
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3634 /* We don't care the syntax in this case. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3635 ret = build_charclass (trans, sbcset,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3636 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3637 mbcset, &alloc,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3638 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3639 class_name, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3640
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3641 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3642 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3643 re_free (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3644 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3645 free_charset (mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3646 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3647 *err = ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3648 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3649 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3650 /* \w match '_' also. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3651 for (; *extra; extra++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3652 bitset_set (sbcset, *extra);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3653
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3654 /* If it is non-matching list. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3655 if (non_match)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3656 bitset_not (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3657
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3658 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3659 /* Ensure only single byte characters are set. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3660 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3661 bitset_mask (sbcset, dfa->sb_char);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3662 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3663
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3664 /* Build a tree for simple bracket. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3665 br_token.type = SIMPLE_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3666 br_token.opr.sbcset = sbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3667 tree = create_token_tree (dfa, NULL, NULL, &br_token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3668 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3669 goto build_word_op_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3670
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3671 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3672 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3673 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3674 bin_tree_t *mbc_tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3675 /* Build a tree for complex bracket. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3676 br_token.type = COMPLEX_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3677 br_token.opr.mbcset = mbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3678 dfa->has_mb_node = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3679 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3680 if (BE (mbc_tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3681 goto build_word_op_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3682 /* Then join them by ALT node. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3683 tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3684 if (BE (mbc_tree != NULL, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3685 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3686 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3687 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3688 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3689 free_charset (mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3690 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3691 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3692 #else /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3693 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3694 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3695
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3696 build_word_op_espace:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3697 re_free (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3698 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3699 free_charset (mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3700 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3701 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3702 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3703 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3704
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3705 /* This is intended for the expressions like "a{1,3}".
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
3706 Fetch a number from 'input', and return the number.
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3707 Return REG_MISSING if the number field is empty like "{,1}".
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3708 Return REG_ERROR if an error occurred. */
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3709
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3710 static Idx
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3711 fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3712 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3713 Idx num = REG_MISSING;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3714 unsigned char c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3715 while (1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3716 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3717 fetch_token (token, input, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3718 c = token->opr.c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3719 if (BE (token->type == END_OF_RE, 0))
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3720 return REG_ERROR;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3721 if (token->type == OP_CLOSE_DUP_NUM || c == ',')
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3722 break;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3723 num = ((token->type != CHARACTER || c < '0' || '9' < c
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3724 || num == REG_ERROR)
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3725 ? REG_ERROR
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3726 : ((num == REG_MISSING) ? c - '0' : num * 10 + c - '0'));
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3727 num = (num > RE_DUP_MAX) ? REG_ERROR : num;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3728 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3729 return num;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3730 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3731
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3732 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3733 static void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3734 free_charset (re_charset_t *cset)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3735 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3736 re_free (cset->mbchars);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3737 # ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3738 re_free (cset->coll_syms);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3739 re_free (cset->equiv_classes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3740 re_free (cset->range_starts);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3741 re_free (cset->range_ends);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3742 # endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3743 re_free (cset->char_classes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3744 re_free (cset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3745 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3746 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3747
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3748 /* Functions for binary tree operation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3749
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3750 /* Create a tree node. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3751
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3752 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3753 create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3754 re_token_type_t type)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3755 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3756 re_token_t t;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3757 t.type = type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3758 return create_token_tree (dfa, left, right, &t);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3759 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3760
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3761 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3762 create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3763 const re_token_t *token)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3764 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3765 bin_tree_t *tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3766 if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3767 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3768 bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3769
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3770 if (storage == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3771 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3772 storage->next = dfa->str_tree_storage;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3773 dfa->str_tree_storage = storage;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3774 dfa->str_tree_storage_idx = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3775 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3776 tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3777
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3778 tree->parent = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3779 tree->left = left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3780 tree->right = right;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3781 tree->token = *token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3782 tree->token.duplicated = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3783 tree->token.opt_subexp = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3784 tree->first = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3785 tree->next = NULL;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3786 tree->node_idx = REG_MISSING;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3787
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3788 if (left != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3789 left->parent = tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3790 if (right != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3791 right->parent = tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3792 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3793 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3794
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3795 /* Mark the tree SRC as an optional subexpression.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3796 To be called from preorder or postorder. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3797
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3798 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3799 mark_opt_subexp (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3800 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3801 Idx idx = (Idx) (long) extra;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3802 if (node->token.type == SUBEXP && node->token.opr.idx == idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3803 node->token.opt_subexp = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3804
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3805 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3806 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3807
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3808 /* Free the allocated memory inside NODE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3809
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3810 static void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3811 free_token (re_token_t *node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3812 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3813 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3814 if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3815 free_charset (node->opr.mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3816 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3817 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3818 if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3819 re_free (node->opr.sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3820 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3821
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3822 /* Worker function for tree walking. Free the allocated memory inside NODE
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3823 and its children. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3824
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3825 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3826 free_tree (void *extra, bin_tree_t *node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3827 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3828 free_token (&node->token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3829 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3830 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3831
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3832
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3833 /* Duplicate the node SRC, and return new node. This is a preorder
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3834 visit similar to the one implemented by the generic visitor, but
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3835 we need more infrastructure to maintain two parallel trees --- so,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3836 it's easier to duplicate. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3837
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3838 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3839 duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3840 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3841 const bin_tree_t *node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3842 bin_tree_t *dup_root;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3843 bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3844
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3845 for (node = root; ; )
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3846 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3847 /* Create a new tree and link it back to the current parent. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3848 *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3849 if (*p_new == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3850 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3851 (*p_new)->parent = dup_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3852 (*p_new)->token.duplicated = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3853 dup_node = *p_new;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3854
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3855 /* Go to the left node, or up and to the right. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3856 if (node->left)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3857 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3858 node = node->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3859 p_new = &dup_node->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3860 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3861 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3862 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3863 const bin_tree_t *prev = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3864 while (node->right == prev || node->right == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3865 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3866 prev = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3867 node = node->parent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3868 dup_node = dup_node->parent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3869 if (!node)
12830
b8269a183e3e regcomp.c: sync white-space changes from glibc
Jim Meyering <meyering@redhat.com>
parents: 12829
diff changeset
3870 return dup_root;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3871 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3872 node = node->right;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3873 p_new = &dup_node->right;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3874 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3875 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3876 }