Mercurial > gnulib
annotate lib/dfa.c @ 40047:183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
v0.1-2213-gae4b73e28 caused a regression in grep-3.2 (no match):
echo '123-x'|LC_ALL=C grep -E '.\bx'
The goal is to revert the first, but reverting it requires to restore
the function deleted in the second. I ran this to restore the deleted
function:
git show v0.1-2281-g95cd86dd7 lib/dfa.c \
| perl -0777 -pe 's/^@@[^\n]*dfaan.*//ms' \
| patch -R -p1
* lib/dfa.c (charclass_context): Restore deleted function.
Reverting the primary commit removes this change:
dfa: Simplify a building state
* lib/dfa.c (build_state): Simplify a building state.
author | Jim Meyering <meyering@fb.com> |
---|---|
date | Thu, 20 Dec 2018 19:51:48 -0800 |
parents | c51e38088432 |
children | b06060465f09 |
rev | line source |
---|---|
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1 /* dfa.c - deterministic extended regexp routines for GNU |
19484
10eb9086bea0
maint: Run 'make update-copyright'
Paul Eggert <eggert@cs.ucla.edu>
parents:
18931
diff
changeset
|
2 Copyright (C) 1988, 1998, 2000, 2002, 2004-2005, 2007-2018 Free Software |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3 Foundation, Inc. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
5 This program is free software; you can redistribute it and/or modify |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
6 it under the terms of the GNU General Public License as published by |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
7 the Free Software Foundation; either version 3, or (at your option) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
8 any later version. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
9 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
10 This program is distributed in the hope that it will be useful, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
11 but WITHOUT ANY WARRANTY; without even the implied warranty of |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
13 GNU General Public License for more details. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
14 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
15 You should have received a copy of the GNU General Public License |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
16 along with this program; if not, write to the Free Software |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
17 Foundation, Inc., |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
18 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
19 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
20 /* Written June, 1988 by Mike Haertel |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
21 Modified July, 1988 by Arthur David Olson to assist BMG speedups */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
22 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
23 #include <config.h> |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
24 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
25 #include "dfa.h" |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
26 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
27 #include <assert.h> |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
28 #include <ctype.h> |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
29 #include <stdint.h> |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
30 #include <stdio.h> |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
31 #include <stdlib.h> |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
32 #include <limits.h> |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
33 #include <string.h> |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
34 #include <locale.h> |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
35 |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
36 static bool |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
37 streq (char const *a, char const *b) |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
38 { |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
39 return strcmp (a, b) == 0; |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
40 } |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
41 |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
42 static bool |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
43 isasciidigit (char c) |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
44 { |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
45 return '0' <= c && c <= '9'; |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
46 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
47 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
48 #include "gettext.h" |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
49 #define _(str) gettext (str) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
50 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
51 #include <wchar.h> |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
52 |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
53 #include "intprops.h" |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
54 #include "xalloc.h" |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
55 #include "localeinfo.h" |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
56 |
18914
886945d1fa95
manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents:
18752
diff
changeset
|
57 #ifndef FALLTHROUGH |
886945d1fa95
manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents:
18752
diff
changeset
|
58 # if __GNUC__ < 7 |
886945d1fa95
manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents:
18752
diff
changeset
|
59 # define FALLTHROUGH ((void) 0) |
886945d1fa95
manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents:
18752
diff
changeset
|
60 # else |
886945d1fa95
manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents:
18752
diff
changeset
|
61 # define FALLTHROUGH __attribute__ ((__fallthrough__)) |
886945d1fa95
manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents:
18752
diff
changeset
|
62 # endif |
886945d1fa95
manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents:
18752
diff
changeset
|
63 #endif |
886945d1fa95
manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents:
18752
diff
changeset
|
64 |
18560
ac2082d27eed
dfa: fix glitches in previous commit
Paul Eggert <eggert@cs.ucla.edu>
parents:
18559
diff
changeset
|
65 #ifndef MIN |
ac2082d27eed
dfa: fix glitches in previous commit
Paul Eggert <eggert@cs.ucla.edu>
parents:
18559
diff
changeset
|
66 # define MIN(a,b) ((a) < (b) ? (a) : (b)) |
ac2082d27eed
dfa: fix glitches in previous commit
Paul Eggert <eggert@cs.ucla.edu>
parents:
18559
diff
changeset
|
67 #endif |
ac2082d27eed
dfa: fix glitches in previous commit
Paul Eggert <eggert@cs.ucla.edu>
parents:
18559
diff
changeset
|
68 |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
69 /* HPUX defines these as macros in sys/param.h. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
70 #ifdef setbit |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
71 # undef setbit |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
72 #endif |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
73 #ifdef clrbit |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
74 # undef clrbit |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
75 #endif |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
76 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
77 /* First integer value that is greater than any character code. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
78 enum { NOTCHAR = 1 << CHAR_BIT }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
79 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
80 /* This represents part of a character class. It must be unsigned and |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
81 at least CHARCLASS_WORD_BITS wide. Any excess bits are zero. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
82 typedef unsigned long int charclass_word; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
83 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
84 /* CHARCLASS_WORD_BITS is the number of bits used in a charclass word. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
85 CHARCLASS_PAIR (LO, HI) is part of a charclass initializer, and |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
86 represents 64 bits' worth of a charclass, where LO and HI are the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
87 low and high-order 32 bits of the 64-bit quantity. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
88 #if ULONG_MAX >> 31 >> 31 < 3 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
89 enum { CHARCLASS_WORD_BITS = 32 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
90 # define CHARCLASS_PAIR(lo, hi) lo, hi |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
91 #else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
92 enum { CHARCLASS_WORD_BITS = 64 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
93 # define CHARCLASS_PAIR(lo, hi) (((charclass_word) (hi) << 32) + (lo)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
94 #endif |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
95 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
96 /* An initializer for a charclass whose 32-bit words are A through H. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
97 #define CHARCLASS_INIT(a, b, c, d, e, f, g, h) \ |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
98 {{ \ |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
99 CHARCLASS_PAIR (a, b), CHARCLASS_PAIR (c, d), \ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
100 CHARCLASS_PAIR (e, f), CHARCLASS_PAIR (g, h) \ |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
101 }} |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
102 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
103 /* The maximum useful value of a charclass_word; all used bits are 1. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
104 static charclass_word const CHARCLASS_WORD_MASK |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
105 = ((charclass_word) 1 << (CHARCLASS_WORD_BITS - 1) << 1) - 1; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
106 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
107 /* Number of words required to hold a bit for every character. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
108 enum |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
109 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
110 CHARCLASS_WORDS = (NOTCHAR + CHARCLASS_WORD_BITS - 1) / CHARCLASS_WORD_BITS |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
111 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
112 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
113 /* Sets of unsigned characters are stored as bit vectors in arrays of ints. */ |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
114 typedef struct { charclass_word w[CHARCLASS_WORDS]; } charclass; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
115 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
116 /* Convert a possibly-signed character to an unsigned character. This is |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
117 a bit safer than casting to unsigned char, since it catches some type |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
118 errors that the cast doesn't. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
119 static unsigned char |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
120 to_uchar (char ch) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
121 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
122 return ch; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
123 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
124 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
125 /* Contexts tell us whether a character is a newline or a word constituent. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
126 Word-constituent characters are those that satisfy iswalnum, plus '_'. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
127 Each character has a single CTX_* value; bitmasks of CTX_* values denote |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
128 a particular character class. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
129 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
130 A state also stores a context value, which is a bitmask of CTX_* values. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
131 A state's context represents a set of characters that the state's |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
132 predecessors must match. For example, a state whose context does not |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
133 include CTX_LETTER will never have transitions where the previous |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
134 character is a word constituent. A state whose context is CTX_ANY |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
135 might have transitions from any character. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
136 |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
137 enum |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
138 { |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
139 CTX_NONE = 1, |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
140 CTX_LETTER = 2, |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
141 CTX_NEWLINE = 4, |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
142 CTX_ANY = 7 |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
143 }; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
144 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
145 /* Sometimes characters can only be matched depending on the surrounding |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
146 context. Such context decisions depend on what the previous character |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
147 was, and the value of the current (lookahead) character. Context |
18667
c83459d710c4
dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents:
18666
diff
changeset
|
148 dependent constraints are encoded as 9-bit integers. Each bit that |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
149 is set indicates that the constraint succeeds in the corresponding |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
150 context. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
151 |
18667
c83459d710c4
dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents:
18666
diff
changeset
|
152 bit 6-8 - valid contexts when next character is CTX_NEWLINE |
c83459d710c4
dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents:
18666
diff
changeset
|
153 bit 3-5 - valid contexts when next character is CTX_LETTER |
c83459d710c4
dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents:
18666
diff
changeset
|
154 bit 0-2 - valid contexts when next character is CTX_NONE |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
155 |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
156 succeeds_in_context determines whether a given constraint |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
157 succeeds in a particular context. Prev is a bitmask of possible |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
158 context values for the previous character, curr is the (single-bit) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
159 context value for the lookahead character. */ |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
160 static int |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
161 newline_constraint (int constraint) |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
162 { |
18667
c83459d710c4
dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents:
18666
diff
changeset
|
163 return (constraint >> 6) & 7; |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
164 } |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
165 static int |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
166 letter_constraint (int constraint) |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
167 { |
18667
c83459d710c4
dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents:
18666
diff
changeset
|
168 return (constraint >> 3) & 7; |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
169 } |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
170 static int |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
171 other_constraint (int constraint) |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
172 { |
18667
c83459d710c4
dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents:
18666
diff
changeset
|
173 return constraint & 7; |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
174 } |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
175 |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
176 static bool |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
177 succeeds_in_context (int constraint, int prev, int curr) |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
178 { |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
179 return !! (((curr & CTX_NONE ? other_constraint (constraint) : 0) \ |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
180 | (curr & CTX_LETTER ? letter_constraint (constraint) : 0) \ |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
181 | (curr & CTX_NEWLINE ? newline_constraint (constraint) : 0)) \ |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
182 & prev); |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
183 } |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
184 |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
185 /* The following describe what a constraint depends on. */ |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
186 static bool |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
187 prev_newline_dependent (int constraint) |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
188 { |
18667
c83459d710c4
dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents:
18666
diff
changeset
|
189 return ((constraint ^ constraint >> 2) & 0111) != 0; |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
190 } |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
191 static bool |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
192 prev_letter_dependent (int constraint) |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
193 { |
18667
c83459d710c4
dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents:
18666
diff
changeset
|
194 return ((constraint ^ constraint >> 1) & 0111) != 0; |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
195 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
196 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
197 /* Tokens that match the empty string subject to some constraint actually |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
198 work by applying that constraint to determine what may follow them, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
199 taking into account what has gone before. The following values are |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
200 the constraints corresponding to the special tokens previously defined. */ |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
201 enum |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
202 { |
18667
c83459d710c4
dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents:
18666
diff
changeset
|
203 NO_CONSTRAINT = 0777, |
c83459d710c4
dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents:
18666
diff
changeset
|
204 BEGLINE_CONSTRAINT = 0444, |
c83459d710c4
dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents:
18666
diff
changeset
|
205 ENDLINE_CONSTRAINT = 0700, |
c83459d710c4
dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents:
18666
diff
changeset
|
206 BEGWORD_CONSTRAINT = 0050, |
c83459d710c4
dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents:
18666
diff
changeset
|
207 ENDWORD_CONSTRAINT = 0202, |
c83459d710c4
dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents:
18666
diff
changeset
|
208 LIMWORD_CONSTRAINT = 0252, |
c83459d710c4
dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents:
18666
diff
changeset
|
209 NOTLIMWORD_CONSTRAINT = 0525 |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
210 }; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
211 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
212 /* The regexp is parsed into an array of tokens in postfix form. Some tokens |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
213 are operators and others are terminal symbols. Most (but not all) of these |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
214 codes are returned by the lexical analyzer. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
215 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
216 typedef ptrdiff_t token; |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
217 static ptrdiff_t const TOKEN_MAX = PTRDIFF_MAX; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
218 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
219 /* States are indexed by state_num values. These are normally |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
220 nonnegative but -1 is used as a special value. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
221 typedef ptrdiff_t state_num; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
222 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
223 /* Predefined token values. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
224 enum |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
225 { |
39857
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
226 END = -1, /* END is a terminal symbol that matches the |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
227 end of input; any value of END or less in |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
228 the parse tree is such a symbol. Accepting |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
229 states of the DFA are those that would have |
39857
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
230 a transition on END. This is -1, not some |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
231 more-negative value, to tweak the speed of |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
232 comparisons to END. */ |
39855
a29036ff511d
dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39760
diff
changeset
|
233 |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
234 /* Ordinary character values are terminal symbols that match themselves. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
235 |
39857
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
236 /* CSET must come last in the following list of special tokens. Otherwise, |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
237 the list order matters only for performance. Related special tokens |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
238 should have nearby values so that code like (t == ANYCHAR || t == MBCSET |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
239 || CSET <= t) can be done with a single machine-level comparison. */ |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
240 |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
241 EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
242 the empty string. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
243 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
244 QMARK, /* QMARK is an operator of one argument that |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
245 matches zero or one occurrences of its |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
246 argument. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
247 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
248 STAR, /* STAR is an operator of one argument that |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
249 matches the Kleene closure (zero or more |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
250 occurrences) of its argument. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
251 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
252 PLUS, /* PLUS is an operator of one argument that |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
253 matches the positive closure (one or more |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
254 occurrences) of its argument. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
255 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
256 REPMN, /* REPMN is a lexical token corresponding |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
257 to the {m,n} construct. REPMN never |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
258 appears in the compiled token vector. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
259 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
260 CAT, /* CAT is an operator of two arguments that |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
261 matches the concatenation of its |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
262 arguments. CAT is never returned by the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
263 lexical analyzer. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
264 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
265 OR, /* OR is an operator of two arguments that |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
266 matches either of its arguments. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
267 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
268 LPAREN, /* LPAREN never appears in the parse tree, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
269 it is only a lexeme. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
270 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
271 RPAREN, /* RPAREN never appears in the parse tree. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
272 |
39857
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
273 WCHAR, /* Only returned by lex. wctok contains |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
274 the wide character representation. */ |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
275 |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
276 ANYCHAR, /* ANYCHAR is a terminal symbol that matches |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
277 a valid multibyte (or single byte) character. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
278 It is used only if MB_CUR_MAX > 1. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
279 |
39857
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
280 BEG, /* BEG is an initial symbol that matches the |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
281 beginning of input. */ |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
282 |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
283 BEGLINE, /* BEGLINE is a terminal symbol that matches |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
284 the empty string at the beginning of a |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
285 line. */ |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
286 |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
287 ENDLINE, /* ENDLINE is a terminal symbol that matches |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
288 the empty string at the end of a line. */ |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
289 |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
290 BEGWORD, /* BEGWORD is a terminal symbol that matches |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
291 the empty string at the beginning of a |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
292 word. */ |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
293 |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
294 ENDWORD, /* ENDWORD is a terminal symbol that matches |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
295 the empty string at the end of a word. */ |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
296 |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
297 LIMWORD, /* LIMWORD is a terminal symbol that matches |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
298 the empty string at the beginning or the |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
299 end of a word. */ |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
300 |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
301 NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
302 matches the empty string not at |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
303 the beginning or end of a word. */ |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
304 |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
305 BACKREF, /* BACKREF is generated by \<digit> |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
306 or by any other construct that |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
307 is not completely handled. If the scanner |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
308 detects a transition on backref, it returns |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
309 a kind of "semi-success" indicating that |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
310 the match will have to be verified with |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
311 a backtracking matcher. */ |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
312 |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
313 MBCSET, /* MBCSET is similar to CSET, but for |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
314 multibyte characters. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
315 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
316 CSET /* CSET and (and any value greater) is a |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
317 terminal symbol that matches any of a |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
318 class of characters. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
319 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
320 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
321 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
322 /* States of the recognizer correspond to sets of positions in the parse |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
323 tree, together with the constraints under which they may be matched. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
324 So a position is encoded as an index into the parse tree together with |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
325 a constraint. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
326 typedef struct |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
327 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
328 size_t index; /* Index into the parse array. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
329 unsigned int constraint; /* Constraint for matching this position. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
330 } position; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
331 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
332 /* Sets of positions are stored as arrays. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
333 typedef struct |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
334 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
335 position *elems; /* Elements of this position set. */ |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
336 ptrdiff_t nelem; /* Number of elements in this set. */ |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
337 ptrdiff_t alloc; /* Number of elements allocated in ELEMS. */ |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
338 } position_set; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
339 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
340 /* A state of the dfa consists of a set of positions, some flags, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
341 and the token value of the lowest-numbered position of the state that |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
342 contains an END token. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
343 typedef struct |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
344 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
345 size_t hash; /* Hash of the positions of this state. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
346 position_set elems; /* Positions this state could match. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
347 unsigned char context; /* Context from previous state. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
348 unsigned short constraint; /* Constraint for this state to accept. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
349 token first_end; /* Token value of the first END in elems. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
350 position_set mbps; /* Positions which can match multibyte |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
351 characters or the follows, e.g., period. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
352 Used only if MB_CUR_MAX > 1. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
353 state_num mb_trindex; /* Index of this state in MB_TRANS, or |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
354 negative if the state does not have |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
355 ANYCHAR. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
356 } dfa_state; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
357 |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
358 /* Maximum for any transition table count. This should be at least 3, |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
359 for the initial state setup. */ |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
360 enum { MAX_TRCOUNT = 1024 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
361 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
362 /* A bracket operator. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
363 e.g., [a-c], [[:alpha:]], etc. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
364 struct mb_char_classes |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
365 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
366 ptrdiff_t cset; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
367 bool invert; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
368 wchar_t *chars; /* Normal characters. */ |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
369 ptrdiff_t nchars; |
18620
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
370 ptrdiff_t nchars_alloc; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
371 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
372 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
373 struct regex_syntax |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
374 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
375 /* Syntax bits controlling the behavior of the lexical analyzer. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
376 reg_syntax_t syntax_bits; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
377 bool syntax_bits_set; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
378 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
379 /* Flag for case-folding letters into sets. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
380 bool case_fold; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
381 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
382 /* True if ^ and $ match only the start and end of data, and do not match |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
383 end-of-line within data. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
384 bool anchor; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
385 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
386 /* End-of-line byte in data. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
387 unsigned char eolbyte; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
388 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
389 /* Cache of char-context values. */ |
18621
fd76a0964db1
dfa: shorten sbit, success
Paul Eggert <eggert@cs.ucla.edu>
parents:
18620
diff
changeset
|
390 char sbit[NOTCHAR]; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
391 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
392 /* If never_trail[B], the byte B cannot be a non-initial byte in a |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
393 multibyte character. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
394 bool never_trail[NOTCHAR]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
395 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
396 /* Set of characters considered letters. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
397 charclass letters; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
398 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
399 /* Set of characters that are newline. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
400 charclass newline; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
401 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
402 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
403 /* Lexical analyzer. All the dross that deals with the obnoxious |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
404 GNU Regex syntax bits is located here. The poor, suffering |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
405 reader is referred to the GNU Regex documentation for the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
406 meaning of the @#%!@#%^!@ syntax bits. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
407 struct lexer_state |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
408 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
409 char const *ptr; /* Pointer to next input character. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
410 size_t left; /* Number of characters remaining. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
411 token lasttok; /* Previous token returned; initially END. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
412 size_t parens; /* Count of outstanding left parens. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
413 int minrep, maxrep; /* Repeat counts for {m,n}. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
414 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
415 /* Wide character representation of the current multibyte character, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
416 or WEOF if there was an encoding error. Used only if |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
417 MB_CUR_MAX > 1. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
418 wint_t wctok; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
419 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
420 /* Length of the multibyte representation of wctok. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
421 int cur_mb_len; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
422 |
18620
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
423 /* The most recently analyzed multibyte bracket expression. */ |
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
424 struct mb_char_classes brack; |
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
425 |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
426 /* We're separated from beginning or (, | only by zero-width characters. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
427 bool laststart; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
428 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
429 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
430 /* Recursive descent parser for regular expressions. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
431 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
432 struct parser_state |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
433 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
434 token tok; /* Lookahead token. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
435 size_t depth; /* Current depth of a hypothetical stack |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
436 holding deferred productions. This is |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
437 used to determine the depth that will be |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
438 required of the real stack later on in |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
439 dfaanalyze. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
440 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
441 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
442 /* A compiled regular expression. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
443 struct dfa |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
444 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
445 /* Syntax configuration */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
446 struct regex_syntax syntax; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
447 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
448 /* Fields filled by the scanner. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
449 charclass *charclasses; /* Array of character sets for CSET tokens. */ |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
450 ptrdiff_t cindex; /* Index for adding new charclasses. */ |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
451 ptrdiff_t calloc; /* Number of charclasses allocated. */ |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
452 size_t canychar; /* Index of anychar class, or (size_t) -1. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
453 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
454 /* Scanner state */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
455 struct lexer_state lex; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
456 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
457 /* Parser state */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
458 struct parser_state parse; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
459 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
460 /* Fields filled by the parser. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
461 token *tokens; /* Postfix parse array. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
462 size_t tindex; /* Index for adding new tokens. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
463 size_t talloc; /* Number of tokens currently allocated. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
464 size_t depth; /* Depth required of an evaluation stack |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
465 used for depth-first traversal of the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
466 parse tree. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
467 size_t nleaves; /* Number of leaves on the parse tree. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
468 size_t nregexps; /* Count of parallel regexps being built |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
469 with dfaparse. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
470 bool fast; /* The DFA is fast. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
471 token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
472 mbstate_t mbs; /* Multibyte conversion state. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
473 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
474 /* The following are valid only if MB_CUR_MAX > 1. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
475 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
476 /* The value of multibyte_prop[i] is defined by following rule. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
477 if tokens[i] < NOTCHAR |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
478 bit 0 : tokens[i] is the first byte of a character, including |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
479 single-byte characters. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
480 bit 1 : tokens[i] is the last byte of a character, including |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
481 single-byte characters. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
482 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
483 e.g. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
484 tokens |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
485 = 'single_byte_a', 'multi_byte_A', single_byte_b' |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
486 = 'sb_a', 'mb_A(1st byte)', 'mb_A(2nd byte)', 'mb_A(3rd byte)', 'sb_b' |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
487 multibyte_prop |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
488 = 3 , 1 , 0 , 2 , 3 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
489 */ |
18620
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
490 char *multibyte_prop; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
491 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
492 /* Fields filled by the superset. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
493 struct dfa *superset; /* Hint of the dfa. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
494 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
495 /* Fields filled by the state builder. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
496 dfa_state *states; /* States of the dfa. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
497 state_num sindex; /* Index for adding new states. */ |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
498 ptrdiff_t salloc; /* Number of states currently allocated. */ |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
499 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
500 /* Fields filled by the parse tree->NFA conversion. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
501 position_set *follows; /* Array of follow sets, indexed by position |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
502 index. The follow of a position is the set |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
503 of positions containing characters that |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
504 could conceivably follow a character |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
505 matching the given position in a string |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
506 matching the regexp. Allocated to the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
507 maximum possible position index. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
508 bool searchflag; /* We are supposed to build a searching |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
509 as opposed to an exact matcher. A searching |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
510 matcher finds the first and shortest string |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
511 matching a regexp anywhere in the buffer, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
512 whereas an exact matcher finds the longest |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
513 string matching, but anchored to the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
514 beginning of the buffer. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
515 |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
516 /* Fields filled by dfaanalyze. */ |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
517 int *constraints; /* Array of union of accepting constraints |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
518 in the follow of a position. */ |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
519 int *separates; /* Array of contexts on follow of a |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
520 position. */ |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
521 |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
522 /* Fields filled by dfaexec. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
523 state_num tralloc; /* Number of transition tables that have |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
524 slots so far, not counting trans[-1] and |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
525 trans[-2]. */ |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
526 int trcount; /* Number of transition tables that have |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
527 been built, other than for initial |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
528 states. */ |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
529 int min_trcount; /* Number of initial states. Equivalently, |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
530 the minimum state number for which trcount |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
531 counts transitions. */ |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
532 state_num **trans; /* Transition tables for states that can |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
533 never accept. If the transitions for a |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
534 state have not yet been computed, or the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
535 state could possibly accept, its entry in |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
536 this table is NULL. This points to two |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
537 past the start of the allocated array, |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
538 and trans[-1] and trans[-2] are always |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
539 NULL. */ |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
540 state_num **fails; /* Transition tables after failing to accept |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
541 on a state that potentially could do so. |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
542 If trans[i] is non-null, fails[i] must |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
543 be null. */ |
18621
fd76a0964db1
dfa: shorten sbit, success
Paul Eggert <eggert@cs.ucla.edu>
parents:
18620
diff
changeset
|
544 char *success; /* Table of acceptance conditions used in |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
545 dfaexec and computed in build_state. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
546 state_num *newlines; /* Transitions on newlines. The entry for a |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
547 newline in any transition table is always |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
548 -1 so we can count lines without wasting |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
549 too many cycles. The transition for a |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
550 newline is stored separately and handled |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
551 as a special case. Newline is also used |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
552 as a sentinel at the end of the buffer. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
553 state_num initstate_notbol; /* Initial state for CTX_LETTER and CTX_NONE |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
554 context in multibyte locales, in which we |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
555 do not distinguish between their contexts, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
556 as not supported word. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
557 position_set mb_follows; /* Follow set added by ANYCHAR on demand. */ |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
558 state_num **mb_trans; /* Transition tables for states with |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
559 ANYCHAR. */ |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
560 state_num mb_trcount; /* Number of transition tables for states with |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
561 ANYCHAR that have actually been built. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
562 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
563 /* Information derived from the locale. This is at the end so that |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
564 a quick memset need not clear it specially. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
565 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
566 /* dfaexec implementation. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
567 char *(*dfaexec) (struct dfa *, char const *, char *, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
568 bool, size_t *, bool *); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
569 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
570 /* The locale is simple, like the C locale. These locales can be |
18752
82d233900292
dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents:
18679
diff
changeset
|
571 processed more efficiently, as they are single-byte, their native |
82d233900292
dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents:
18679
diff
changeset
|
572 character set is in collating-sequence order, and they do not |
82d233900292
dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents:
18679
diff
changeset
|
573 have multi-character collating elements. */ |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
574 bool simple_locale; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
575 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
576 /* Other cached information derived from the locale. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
577 struct localeinfo localeinfo; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
578 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
579 |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
580 /* User access to dfa internals. */ |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
581 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
582 /* S could possibly be an accepting state of R. */ |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
583 static bool |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
584 accepting (state_num s, struct dfa const *r) |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
585 { |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
586 return r->states[s].constraint != 0; |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
587 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
588 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
589 /* STATE accepts in the specified context. */ |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
590 static bool |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
591 accepts_in_context (int prev, int curr, state_num state, struct dfa const *dfa) |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
592 { |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
593 return succeeds_in_context (dfa->states[state].constraint, prev, curr); |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
594 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
595 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
596 static void regexp (struct dfa *dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
597 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
598 /* Store into *PWC the result of converting the leading bytes of the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
599 multibyte buffer S of length N bytes, using D->localeinfo.sbctowc |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
600 and updating the conversion state in *D. On conversion error, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
601 convert just a single byte, to WEOF. Return the number of bytes |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
602 converted. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
603 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
604 This differs from mbrtowc (PWC, S, N, &D->mbs) as follows: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
605 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
606 * PWC points to wint_t, not to wchar_t. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
607 * The last arg is a dfa *D instead of merely a multibyte conversion |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
608 state D->mbs. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
609 * N must be at least 1. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
610 * S[N - 1] must be a sentinel byte. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
611 * Shift encodings are not supported. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
612 * The return value is always in the range 1..N. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
613 * D->mbs is always valid afterwards. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
614 * *PWC is always set to something. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
615 static size_t |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
616 mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct dfa *d) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
617 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
618 unsigned char uc = s[0]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
619 wint_t wc = d->localeinfo.sbctowc[uc]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
620 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
621 if (wc == WEOF) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
622 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
623 wchar_t wch; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
624 size_t nbytes = mbrtowc (&wch, s, n, &d->mbs); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
625 if (0 < nbytes && nbytes < (size_t) -2) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
626 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
627 *pwc = wch; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
628 return nbytes; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
629 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
630 memset (&d->mbs, 0, sizeof d->mbs); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
631 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
632 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
633 *pwc = wc; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
634 return 1; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
635 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
636 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
637 #ifdef DEBUG |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
638 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
639 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
640 prtok (token t) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
641 { |
39855
a29036ff511d
dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39760
diff
changeset
|
642 if (t <= END) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
643 fprintf (stderr, "END"); |
39855
a29036ff511d
dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39760
diff
changeset
|
644 else if (0 <= t && t < NOTCHAR) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
645 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
646 unsigned int ch = t; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
647 fprintf (stderr, "0x%02x", ch); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
648 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
649 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
650 { |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
651 char const *s; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
652 switch (t) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
653 { |
39855
a29036ff511d
dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39760
diff
changeset
|
654 case BEG: |
a29036ff511d
dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39760
diff
changeset
|
655 s = "BEG"; |
a29036ff511d
dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39760
diff
changeset
|
656 break; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
657 case EMPTY: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
658 s = "EMPTY"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
659 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
660 case BACKREF: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
661 s = "BACKREF"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
662 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
663 case BEGLINE: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
664 s = "BEGLINE"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
665 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
666 case ENDLINE: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
667 s = "ENDLINE"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
668 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
669 case BEGWORD: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
670 s = "BEGWORD"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
671 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
672 case ENDWORD: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
673 s = "ENDWORD"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
674 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
675 case LIMWORD: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
676 s = "LIMWORD"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
677 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
678 case NOTLIMWORD: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
679 s = "NOTLIMWORD"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
680 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
681 case QMARK: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
682 s = "QMARK"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
683 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
684 case STAR: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
685 s = "STAR"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
686 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
687 case PLUS: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
688 s = "PLUS"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
689 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
690 case CAT: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
691 s = "CAT"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
692 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
693 case OR: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
694 s = "OR"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
695 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
696 case LPAREN: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
697 s = "LPAREN"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
698 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
699 case RPAREN: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
700 s = "RPAREN"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
701 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
702 case ANYCHAR: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
703 s = "ANYCHAR"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
704 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
705 case MBCSET: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
706 s = "MBCSET"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
707 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
708 default: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
709 s = "CSET"; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
710 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
711 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
712 fprintf (stderr, "%s", s); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
713 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
714 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
715 #endif /* DEBUG */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
716 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
717 /* Stuff pertaining to charclasses. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
718 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
719 static bool |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
720 tstbit (unsigned int b, charclass const *c) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
721 { |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
722 return c->w[b / CHARCLASS_WORD_BITS] >> b % CHARCLASS_WORD_BITS & 1; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
723 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
724 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
725 static void |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
726 setbit (unsigned int b, charclass *c) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
727 { |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
728 charclass_word one = 1; |
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
729 c->w[b / CHARCLASS_WORD_BITS] |= one << b % CHARCLASS_WORD_BITS; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
730 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
731 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
732 static void |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
733 clrbit (unsigned int b, charclass *c) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
734 { |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
735 charclass_word one = 1; |
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
736 c->w[b / CHARCLASS_WORD_BITS] &= ~(one << b % CHARCLASS_WORD_BITS); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
737 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
738 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
739 static void |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
740 zeroset (charclass *s) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
741 { |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
742 memset (s, 0, sizeof *s); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
743 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
744 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
745 static void |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
746 fillset (charclass *s) |
18525
1545248f9c57
dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents:
18524
diff
changeset
|
747 { |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
748 for (int i = 0; i < CHARCLASS_WORDS; i++) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
749 s->w[i] = CHARCLASS_WORD_MASK; |
18525
1545248f9c57
dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents:
18524
diff
changeset
|
750 } |
1545248f9c57
dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents:
18524
diff
changeset
|
751 |
1545248f9c57
dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents:
18524
diff
changeset
|
752 static void |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
753 notset (charclass *s) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
754 { |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
755 for (int i = 0; i < CHARCLASS_WORDS; ++i) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
756 s->w[i] = CHARCLASS_WORD_MASK & ~s->w[i]; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
757 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
758 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
759 static bool |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
760 equal (charclass const *s1, charclass const *s2) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
761 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
762 charclass_word w = 0; |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
763 for (int i = 0; i < CHARCLASS_WORDS; i++) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
764 w |= s1->w[i] ^ s2->w[i]; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
765 return w == 0; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
766 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
767 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
768 static bool |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
769 emptyset (charclass const *s) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
770 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
771 charclass_word w = 0; |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
772 for (int i = 0; i < CHARCLASS_WORDS; i++) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
773 w |= s->w[i]; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
774 return w == 0; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
775 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
776 |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
777 /* Grow PA, which points to an array of *NITEMS items, and return the |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
778 location of the reallocated array, updating *NITEMS to reflect its |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
779 new size. The new array will contain at least NITEMS_INCR_MIN more |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
780 items, but will not contain more than NITEMS_MAX items total. |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
781 ITEM_SIZE is the size of each item, in bytes. |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
782 |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
783 ITEM_SIZE and NITEMS_INCR_MIN must be positive. *NITEMS must be |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
784 nonnegative. If NITEMS_MAX is -1, it is treated as if it were |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
785 infinity. |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
786 |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
787 If PA is null, then allocate a new array instead of reallocating |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
788 the old one. |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
789 |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
790 Thus, to grow an array A without saving its old contents, do |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
791 { free (A); A = xpalloc (NULL, &AITEMS, ...); }. */ |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
792 |
18560
ac2082d27eed
dfa: fix glitches in previous commit
Paul Eggert <eggert@cs.ucla.edu>
parents:
18559
diff
changeset
|
793 static void * |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
794 xpalloc (void *pa, ptrdiff_t *nitems, ptrdiff_t nitems_incr_min, |
39722
f7fc45eece35
Continue to use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
19484
diff
changeset
|
795 ptrdiff_t nitems_max, ptrdiff_t item_size) |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
796 { |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
797 ptrdiff_t n0 = *nitems; |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
798 |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
799 /* The approximate size to use for initial small allocation |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
800 requests. This is the largest "small" request for the GNU C |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
801 library malloc. */ |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
802 enum { DEFAULT_MXFAST = 64 * sizeof (size_t) / 4 }; |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
803 |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
804 /* If the array is tiny, grow it to about (but no greater than) |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
805 DEFAULT_MXFAST bytes. Otherwise, grow it by about 50%. |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
806 Adjust the growth according to three constraints: NITEMS_INCR_MIN, |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
807 NITEMS_MAX, and what the C language can represent safely. */ |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
808 |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
809 ptrdiff_t n, nbytes; |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
810 if (INT_ADD_WRAPV (n0, n0 >> 1, &n)) |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
811 n = PTRDIFF_MAX; |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
812 if (0 <= nitems_max && nitems_max < n) |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
813 n = nitems_max; |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
814 |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
815 ptrdiff_t adjusted_nbytes |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
816 = ((INT_MULTIPLY_WRAPV (n, item_size, &nbytes) || SIZE_MAX < nbytes) |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
817 ? MIN (PTRDIFF_MAX, SIZE_MAX) |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
818 : nbytes < DEFAULT_MXFAST ? DEFAULT_MXFAST : 0); |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
819 if (adjusted_nbytes) |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
820 { |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
821 n = adjusted_nbytes / item_size; |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
822 nbytes = adjusted_nbytes - adjusted_nbytes % item_size; |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
823 } |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
824 |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
825 if (! pa) |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
826 *nitems = 0; |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
827 if (n - n0 < nitems_incr_min |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
828 && (INT_ADD_WRAPV (n0, nitems_incr_min, &n) |
39722
f7fc45eece35
Continue to use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
19484
diff
changeset
|
829 || (0 <= nitems_max && nitems_max < n) |
f7fc45eece35
Continue to use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
19484
diff
changeset
|
830 || INT_MULTIPLY_WRAPV (n, item_size, &nbytes))) |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
831 xalloc_die (); |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
832 pa = xrealloc (pa, nbytes); |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
833 *nitems = n; |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
834 return pa; |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
835 } |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
836 |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
837 /* Ensure that the array addressed by PA holds at least I + 1 items. |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
838 Either return PA, or reallocate the array and return its new address. |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
839 Although PA may be null, the returned value is never null. |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
840 |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
841 The array holds *NITEMS items, where 0 <= I <= *NITEMS; *NITEMS |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
842 is updated on reallocation. If PA is null, *NITEMS must be zero. |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
843 Do not allocate more than NITEMS_MAX items total; -1 means no limit. |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
844 ITEM_SIZE is the size of one item; it must be positive. |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
845 Avoid O(N**2) behavior on arrays growing linearly. */ |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
846 static void * |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
847 maybe_realloc (void *pa, ptrdiff_t i, ptrdiff_t *nitems, |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
848 ptrdiff_t nitems_max, ptrdiff_t item_size) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
849 { |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
850 if (i < *nitems) |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
851 return pa; |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
852 return xpalloc (pa, nitems, 1, nitems_max, item_size); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
853 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
854 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
855 /* In DFA D, find the index of charclass S, or allocate a new one. */ |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
856 static ptrdiff_t |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
857 charclass_index (struct dfa *d, charclass *s) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
858 { |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
859 ptrdiff_t i; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
860 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
861 for (i = 0; i < d->cindex; ++i) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
862 if (equal (s, &d->charclasses[i])) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
863 return i; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
864 d->charclasses = maybe_realloc (d->charclasses, d->cindex, &d->calloc, |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
865 TOKEN_MAX - CSET, sizeof *d->charclasses); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
866 ++d->cindex; |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
867 d->charclasses[i] = *s; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
868 return i; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
869 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
870 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
871 static bool |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
872 unibyte_word_constituent (struct dfa const *dfa, unsigned char c) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
873 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
874 return dfa->localeinfo.sbctowc[c] != WEOF && (isalnum (c) || (c) == '_'); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
875 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
876 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
877 static int |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
878 char_context (struct dfa const *dfa, unsigned char c) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
879 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
880 if (c == dfa->syntax.eolbyte && !dfa->syntax.anchor) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
881 return CTX_NEWLINE; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
882 if (unibyte_word_constituent (dfa, c)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
883 return CTX_LETTER; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
884 return CTX_NONE; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
885 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
886 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
887 /* Set a bit in the charclass for the given wchar_t. Do nothing if WC |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
888 is represented by a multi-byte sequence. Even for MB_CUR_MAX == 1, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
889 this may happen when folding case in weird Turkish locales where |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
890 dotless i/dotted I are not included in the chosen character set. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
891 Return whether a bit was set in the charclass. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
892 static bool |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
893 setbit_wc (wint_t wc, charclass *c) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
894 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
895 int b = wctob (wc); |
18619
5ceb90ef64b1
dfa: minor performance tweak
Paul Eggert <eggert@cs.ucla.edu>
parents:
18618
diff
changeset
|
896 if (b < 0) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
897 return false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
898 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
899 setbit (b, c); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
900 return true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
901 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
902 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
903 /* Set a bit for B and its case variants in the charclass C. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
904 MB_CUR_MAX must be 1. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
905 static void |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
906 setbit_case_fold_c (int b, charclass *c) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
907 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
908 int ub = toupper (b); |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
909 for (int i = 0; i < NOTCHAR; i++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
910 if (toupper (i) == ub) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
911 setbit (i, c); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
912 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
913 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
914 /* Return true if the locale compatible with the C locale. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
915 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
916 static bool |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
917 using_simple_locale (bool multibyte) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
918 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
919 /* The native character set is known to be compatible with |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
920 the C locale. The following test isn't perfect, but it's good |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
921 enough in practice, as only ASCII and EBCDIC are in common use |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
922 and this test correctly accepts ASCII and rejects EBCDIC. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
923 enum { native_c_charset = |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
924 ('\b' == 8 && '\t' == 9 && '\n' == 10 && '\v' == 11 && '\f' == 12 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
925 && '\r' == 13 && ' ' == 32 && '!' == 33 && '"' == 34 && '#' == 35 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
926 && '%' == 37 && '&' == 38 && '\'' == 39 && '(' == 40 && ')' == 41 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
927 && '*' == 42 && '+' == 43 && ',' == 44 && '-' == 45 && '.' == 46 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
928 && '/' == 47 && '0' == 48 && '9' == 57 && ':' == 58 && ';' == 59 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
929 && '<' == 60 && '=' == 61 && '>' == 62 && '?' == 63 && 'A' == 65 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
930 && 'Z' == 90 && '[' == 91 && '\\' == 92 && ']' == 93 && '^' == 94 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
931 && '_' == 95 && 'a' == 97 && 'z' == 122 && '{' == 123 && '|' == 124 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
932 && '}' == 125 && '~' == 126) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
933 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
934 |
18519 | 935 if (!native_c_charset || multibyte) |
936 return false; | |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
937 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
938 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
939 /* Treat C and POSIX locales as being compatible. Also, treat |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
940 errors as compatible, as these are invariably from stubs. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
941 char const *loc = setlocale (LC_ALL, NULL); |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
942 return !loc || streq (loc, "C") || streq (loc, "POSIX"); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
943 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
944 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
945 |
18634
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
946 /* Fetch the next lexical input character from the pattern. There |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
947 must at least one byte of pattern input. Set DFA->lex.wctok to the |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
948 value of the character or to WEOF depending on whether the input is |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
949 a valid multibyte character (possibly of length 1). Then return |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
950 the next input byte value, except return EOF if the input is a |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
951 multibyte character of length greater than 1. */ |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
952 static int |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
953 fetch_wc (struct dfa *dfa) |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
954 { |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
955 size_t nbytes = mbs_to_wchar (&dfa->lex.wctok, dfa->lex.ptr, dfa->lex.left, |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
956 dfa); |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
957 dfa->lex.cur_mb_len = nbytes; |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
958 int c = nbytes == 1 ? to_uchar (dfa->lex.ptr[0]) : EOF; |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
959 dfa->lex.ptr += nbytes; |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
960 dfa->lex.left -= nbytes; |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
961 return c; |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
962 } |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
963 |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
964 /* If there is no more input, report an error about unbalanced brackets. |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
965 Otherwise, behave as with fetch_wc (DFA). */ |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
966 static int |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
967 bracket_fetch_wc (struct dfa *dfa) |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
968 { |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
969 if (! dfa->lex.left) |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
970 dfaerror (_("unbalanced [")); |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
971 return fetch_wc (dfa); |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
972 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
973 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
974 typedef int predicate (int); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
975 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
976 /* The following list maps the names of the Posix named character classes |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
977 to predicate functions that determine whether a given character is in |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
978 the class. The leading [ has already been eaten by the lexical |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
979 analyzer. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
980 struct dfa_ctype |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
981 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
982 const char *name; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
983 predicate *func; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
984 bool single_byte_only; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
985 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
986 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
987 static const struct dfa_ctype prednames[] = { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
988 {"alpha", isalpha, false}, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
989 {"upper", isupper, false}, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
990 {"lower", islower, false}, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
991 {"digit", isdigit, true}, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
992 {"xdigit", isxdigit, false}, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
993 {"space", isspace, false}, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
994 {"punct", ispunct, false}, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
995 {"alnum", isalnum, false}, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
996 {"print", isprint, false}, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
997 {"graph", isgraph, false}, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
998 {"cntrl", iscntrl, false}, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
999 {"blank", isblank, false}, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1000 {NULL, NULL, false} |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1001 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1002 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1003 static const struct dfa_ctype *_GL_ATTRIBUTE_PURE |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1004 find_pred (const char *str) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1005 { |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1006 for (unsigned int i = 0; prednames[i].name; ++i) |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
1007 if (streq (str, prednames[i].name)) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1008 return &prednames[i]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1009 return NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1010 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1011 |
18620
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1012 /* Parse a bracket expression, which possibly includes multibyte |
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1013 characters. */ |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1014 static token |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1015 parse_bracket_exp (struct dfa *dfa) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1016 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1017 /* This is a bracket expression that dfaexec is known to |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1018 process correctly. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1019 bool known_bracket_exp = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1020 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1021 /* Used to warn about [:space:]. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1022 Bit 0 = first character is a colon. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1023 Bit 1 = last character is a colon. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1024 Bit 2 = includes any other character but a colon. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1025 Bit 3 = includes ranges, char/equiv classes or collation elements. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1026 int colon_warning_state; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1027 |
18620
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1028 dfa->lex.brack.nchars = 0; |
18633
42cabb9832cd
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18632
diff
changeset
|
1029 charclass ccl; |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1030 zeroset (&ccl); |
18634
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1031 int c = bracket_fetch_wc (dfa); |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
1032 bool invert = c == '^'; |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
1033 if (invert) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1034 { |
18634
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1035 c = bracket_fetch_wc (dfa); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1036 known_bracket_exp = dfa->simple_locale; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1037 } |
18634
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1038 wint_t wc = dfa->lex.wctok; |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1039 int c1; |
18634
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1040 wint_t wc1; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1041 colon_warning_state = (c == ':'); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1042 do |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1043 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1044 c1 = NOTCHAR; /* Mark c1 as not initialized. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1045 colon_warning_state &= ~2; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1046 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1047 /* Note that if we're looking at some other [:...:] construct, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1048 we just treat it as a bunch of ordinary characters. We can do |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1049 this because we assume regex has checked for syntax errors before |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1050 dfa is ever called. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1051 if (c == '[') |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1052 { |
18634
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1053 c1 = bracket_fetch_wc (dfa); |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1054 wc1 = dfa->lex.wctok; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1055 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1056 if ((c1 == ':' && (dfa->syntax.syntax_bits & RE_CHAR_CLASSES)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1057 || c1 == '.' || c1 == '=') |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1058 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1059 enum { MAX_BRACKET_STRING_LEN = 32 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1060 char str[MAX_BRACKET_STRING_LEN + 1]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1061 size_t len = 0; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1062 for (;;) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1063 { |
18634
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1064 c = bracket_fetch_wc (dfa); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1065 if (dfa->lex.left == 0 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1066 || (c == c1 && dfa->lex.ptr[0] == ']')) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1067 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1068 if (len < MAX_BRACKET_STRING_LEN) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1069 str[len++] = c; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1070 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1071 /* This is in any case an invalid class name. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1072 str[0] = '\0'; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1073 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1074 str[len] = '\0'; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1075 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1076 /* Fetch bracket. */ |
18634
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1077 c = bracket_fetch_wc (dfa); |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1078 wc = dfa->lex.wctok; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1079 if (c1 == ':') |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1080 /* Build character class. POSIX allows character |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1081 classes to match multicharacter collating elements, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1082 but the regex code does not support that, so do not |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1083 worry about that possibility. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1084 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1085 char const *class |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
1086 = (dfa->syntax.case_fold && (streq (str, "upper") |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
1087 || streq (str, "lower")) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1088 ? "alpha" : str); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1089 const struct dfa_ctype *pred = find_pred (class); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1090 if (!pred) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1091 dfaerror (_("invalid character class")); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1092 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1093 if (dfa->localeinfo.multibyte && !pred->single_byte_only) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1094 known_bracket_exp = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1095 else |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1096 for (int c2 = 0; c2 < NOTCHAR; ++c2) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1097 if (pred->func (c2)) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1098 setbit (c2, &ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1099 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1100 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1101 known_bracket_exp = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1102 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1103 colon_warning_state |= 8; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1104 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1105 /* Fetch new lookahead character. */ |
18634
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1106 c1 = bracket_fetch_wc (dfa); |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1107 wc1 = dfa->lex.wctok; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1108 continue; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1109 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1110 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1111 /* We treat '[' as a normal character here. c/c1/wc/wc1 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1112 are already set up. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1113 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1114 |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1115 if (c == '\\' |
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1116 && (dfa->syntax.syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) |
18634
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1117 { |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1118 c = bracket_fetch_wc (dfa); |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1119 wc = dfa->lex.wctok; |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1120 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1121 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1122 if (c1 == NOTCHAR) |
18634
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1123 { |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1124 c1 = bracket_fetch_wc (dfa); |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1125 wc1 = dfa->lex.wctok; |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1126 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1127 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1128 if (c1 == '-') |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1129 /* build range characters. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1130 { |
18634
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1131 int c2 = bracket_fetch_wc (dfa); |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1132 wint_t wc2 = dfa->lex.wctok; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1133 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1134 /* A bracket expression like [a-[.aa.]] matches an unknown set. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1135 Treat it like [-a[.aa.]] while parsing it, and |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1136 remember that the set is unknown. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1137 if (c2 == '[' && dfa->lex.ptr[0] == '.') |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1138 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1139 known_bracket_exp = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1140 c2 = ']'; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1141 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1142 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1143 if (c2 == ']') |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1144 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1145 /* In the case [x-], the - is an ordinary hyphen, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1146 which is left in c1, the lookahead character. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1147 dfa->lex.ptr -= dfa->lex.cur_mb_len; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1148 dfa->lex.left += dfa->lex.cur_mb_len; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1149 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1150 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1151 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1152 if (c2 == '\\' && (dfa->syntax.syntax_bits |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1153 & RE_BACKSLASH_ESCAPE_IN_LISTS)) |
18634
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1154 { |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1155 c2 = bracket_fetch_wc (dfa); |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1156 wc2 = dfa->lex.wctok; |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1157 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1158 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1159 colon_warning_state |= 8; |
18634
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1160 c1 = bracket_fetch_wc (dfa); |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1161 wc1 = dfa->lex.wctok; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1162 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1163 /* Treat [x-y] as a range if x != y. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1164 if (wc != wc2 || wc == WEOF) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1165 { |
18752
82d233900292
dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents:
18679
diff
changeset
|
1166 if (dfa->simple_locale |
82d233900292
dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents:
18679
diff
changeset
|
1167 || (isasciidigit (c) & isasciidigit (c2))) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1168 { |
18752
82d233900292
dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents:
18679
diff
changeset
|
1169 for (int ci = c; ci <= c2; ci++) |
82d233900292
dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents:
18679
diff
changeset
|
1170 if (dfa->syntax.case_fold && isalpha (ci)) |
82d233900292
dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents:
18679
diff
changeset
|
1171 setbit_case_fold_c (ci, &ccl); |
82d233900292
dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents:
18679
diff
changeset
|
1172 else |
82d233900292
dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents:
18679
diff
changeset
|
1173 setbit (ci, &ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1174 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1175 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1176 known_bracket_exp = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1177 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1178 continue; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1179 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1180 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1181 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1182 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1183 colon_warning_state |= (c == ':') ? 2 : 4; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1184 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1185 if (!dfa->localeinfo.multibyte) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1186 { |
18752
82d233900292
dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents:
18679
diff
changeset
|
1187 if (dfa->syntax.case_fold && isalpha (c)) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1188 setbit_case_fold_c (c, &ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1189 else |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1190 setbit (c, &ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1191 continue; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1192 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1193 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1194 if (wc == WEOF) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1195 known_bracket_exp = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1196 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1197 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1198 wchar_t folded[CASE_FOLDED_BUFSIZE + 1]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1199 unsigned int n = (dfa->syntax.case_fold |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1200 ? case_folded_counterparts (wc, folded + 1) + 1 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1201 : 1); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1202 folded[0] = wc; |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1203 for (unsigned int i = 0; i < n; i++) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1204 if (!setbit_wc (folded[i], &ccl)) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1205 { |
18620
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1206 dfa->lex.brack.chars |
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1207 = maybe_realloc (dfa->lex.brack.chars, dfa->lex.brack.nchars, |
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1208 &dfa->lex.brack.nchars_alloc, -1, |
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1209 sizeof *dfa->lex.brack.chars); |
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1210 dfa->lex.brack.chars[dfa->lex.brack.nchars++] = folded[i]; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1211 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1212 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1213 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1214 while ((wc = wc1, (c = c1) != ']')); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1215 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1216 if (colon_warning_state == 7) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1217 dfawarn (_("character class syntax is [[:space:]], not [:space:]")); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1218 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1219 if (! known_bracket_exp) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1220 return BACKREF; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1221 |
18752
82d233900292
dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents:
18679
diff
changeset
|
1222 if (dfa->localeinfo.multibyte && (invert || dfa->lex.brack.nchars != 0)) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1223 { |
18620
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1224 dfa->lex.brack.invert = invert; |
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1225 dfa->lex.brack.cset = emptyset (&ccl) ? -1 : charclass_index (dfa, &ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1226 return MBCSET; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1227 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1228 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1229 if (invert) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1230 { |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1231 notset (&ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1232 if (dfa->syntax.syntax_bits & RE_HAT_LISTS_NOT_NEWLINE) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1233 clrbit ('\n', &ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1234 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1235 |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1236 return CSET + charclass_index (dfa, &ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1237 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1238 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1239 struct lexptr |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1240 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1241 char const *ptr; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1242 size_t left; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1243 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1244 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1245 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1246 push_lex_state (struct dfa *dfa, struct lexptr *ls, char const *s) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1247 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1248 ls->ptr = dfa->lex.ptr; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1249 ls->left = dfa->lex.left; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1250 dfa->lex.ptr = s; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1251 dfa->lex.left = strlen (s); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1252 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1253 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1254 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1255 pop_lex_state (struct dfa *dfa, struct lexptr const *ls) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1256 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1257 dfa->lex.ptr = ls->ptr; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1258 dfa->lex.left = ls->left; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1259 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1260 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1261 static token |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1262 lex (struct dfa *dfa) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1263 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1264 bool backslash = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1265 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1266 /* Basic plan: We fetch a character. If it's a backslash, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1267 we set the backslash flag and go through the loop again. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1268 On the plus side, this avoids having a duplicate of the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1269 main switch inside the backslash case. On the minus side, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1270 it means that just about every case begins with |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1271 "if (backslash) ...". */ |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1272 for (int i = 0; i < 2; ++i) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1273 { |
18634
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1274 if (! dfa->lex.left) |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1275 return dfa->lex.lasttok = END; |
a2fc5a686baf
dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents:
18633
diff
changeset
|
1276 int c = fetch_wc (dfa); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1277 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1278 switch (c) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1279 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1280 case '\\': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1281 if (backslash) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1282 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1283 if (dfa->lex.left == 0) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1284 dfaerror (_("unfinished \\ escape")); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1285 backslash = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1286 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1287 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1288 case '^': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1289 if (backslash) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1290 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1291 if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1292 || dfa->lex.lasttok == END || dfa->lex.lasttok == LPAREN |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1293 || dfa->lex.lasttok == OR) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1294 return dfa->lex.lasttok = BEGLINE; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1295 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1296 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1297 case '$': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1298 if (backslash) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1299 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1300 if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1301 || dfa->lex.left == 0 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1302 || ((dfa->lex.left |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1303 > !(dfa->syntax.syntax_bits & RE_NO_BK_PARENS)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1304 && (dfa->lex.ptr[!(dfa->syntax.syntax_bits & RE_NO_BK_PARENS) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1305 & (dfa->lex.ptr[0] == '\\')] |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1306 == ')')) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1307 || ((dfa->lex.left |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1308 > !(dfa->syntax.syntax_bits & RE_NO_BK_VBAR)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1309 && (dfa->lex.ptr[!(dfa->syntax.syntax_bits & RE_NO_BK_VBAR) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1310 & (dfa->lex.ptr[0] == '\\')] |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1311 == '|')) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1312 || ((dfa->syntax.syntax_bits & RE_NEWLINE_ALT) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1313 && dfa->lex.left > 0 && dfa->lex.ptr[0] == '\n')) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1314 return dfa->lex.lasttok = ENDLINE; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1315 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1316 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1317 case '1': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1318 case '2': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1319 case '3': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1320 case '4': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1321 case '5': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1322 case '6': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1323 case '7': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1324 case '8': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1325 case '9': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1326 if (backslash && !(dfa->syntax.syntax_bits & RE_NO_BK_REFS)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1327 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1328 dfa->lex.laststart = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1329 return dfa->lex.lasttok = BACKREF; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1330 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1331 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1332 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1333 case '`': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1334 if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1335 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1336 /* FIXME: should be beginning of string */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1337 return dfa->lex.lasttok = BEGLINE; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1338 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1339 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1340 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1341 case '\'': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1342 if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1343 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1344 /* FIXME: should be end of string */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1345 return dfa->lex.lasttok = ENDLINE; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1346 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1347 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1348 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1349 case '<': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1350 if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1351 return dfa->lex.lasttok = BEGWORD; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1352 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1353 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1354 case '>': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1355 if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1356 return dfa->lex.lasttok = ENDWORD; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1357 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1358 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1359 case 'b': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1360 if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1361 return dfa->lex.lasttok = LIMWORD; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1362 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1363 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1364 case 'B': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1365 if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1366 return dfa->lex.lasttok = NOTLIMWORD; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1367 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1368 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1369 case '?': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1370 if (dfa->syntax.syntax_bits & RE_LIMITED_OPS) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1371 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1372 if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1373 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1374 if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1375 && dfa->lex.laststart) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1376 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1377 return dfa->lex.lasttok = QMARK; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1378 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1379 case '*': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1380 if (backslash) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1381 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1382 if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1383 && dfa->lex.laststart) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1384 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1385 return dfa->lex.lasttok = STAR; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1386 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1387 case '+': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1388 if (dfa->syntax.syntax_bits & RE_LIMITED_OPS) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1389 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1390 if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1391 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1392 if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1393 && dfa->lex.laststart) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1394 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1395 return dfa->lex.lasttok = PLUS; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1396 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1397 case '{': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1398 if (!(dfa->syntax.syntax_bits & RE_INTERVALS)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1399 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1400 if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_BRACES) == 0)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1401 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1402 if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1403 && dfa->lex.laststart) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1404 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1405 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1406 /* Cases: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1407 {M} - exact count |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1408 {M,} - minimum count, maximum is infinity |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1409 {,N} - 0 through N |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1410 {,} - 0 to infinity (same as '*') |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1411 {M,N} - M through N */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1412 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1413 char const *p = dfa->lex.ptr; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1414 char const *lim = p + dfa->lex.left; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1415 dfa->lex.minrep = dfa->lex.maxrep = -1; |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
1416 for (; p != lim && isasciidigit (*p); p++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1417 dfa->lex.minrep = (dfa->lex.minrep < 0 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1418 ? *p - '0' |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1419 : MIN (RE_DUP_MAX + 1, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1420 dfa->lex.minrep * 10 + *p - '0')); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1421 if (p != lim) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1422 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1423 if (*p != ',') |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1424 dfa->lex.maxrep = dfa->lex.minrep; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1425 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1426 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1427 if (dfa->lex.minrep < 0) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1428 dfa->lex.minrep = 0; |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
1429 while (++p != lim && isasciidigit (*p)) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1430 dfa->lex.maxrep |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1431 = (dfa->lex.maxrep < 0 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1432 ? *p - '0' |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1433 : MIN (RE_DUP_MAX + 1, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1434 dfa->lex.maxrep * 10 + *p - '0')); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1435 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1436 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1437 if (! ((! backslash || (p != lim && *p++ == '\\')) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1438 && p != lim && *p++ == '}' |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1439 && 0 <= dfa->lex.minrep |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1440 && (dfa->lex.maxrep < 0 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1441 || dfa->lex.minrep <= dfa->lex.maxrep))) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1442 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1443 if (dfa->syntax.syntax_bits & RE_INVALID_INTERVAL_ORD) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1444 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1445 dfaerror (_("invalid content of \\{\\}")); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1446 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1447 if (RE_DUP_MAX < dfa->lex.maxrep) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1448 dfaerror (_("regular expression too big")); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1449 dfa->lex.ptr = p; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1450 dfa->lex.left = lim - p; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1451 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1452 dfa->lex.laststart = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1453 return dfa->lex.lasttok = REPMN; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1454 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1455 case '|': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1456 if (dfa->syntax.syntax_bits & RE_LIMITED_OPS) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1457 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1458 if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_VBAR) == 0)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1459 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1460 dfa->lex.laststart = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1461 return dfa->lex.lasttok = OR; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1462 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1463 case '\n': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1464 if (dfa->syntax.syntax_bits & RE_LIMITED_OPS |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1465 || backslash || !(dfa->syntax.syntax_bits & RE_NEWLINE_ALT)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1466 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1467 dfa->lex.laststart = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1468 return dfa->lex.lasttok = OR; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1469 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1470 case '(': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1471 if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1472 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1473 dfa->lex.parens++; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1474 dfa->lex.laststart = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1475 return dfa->lex.lasttok = LPAREN; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1476 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1477 case ')': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1478 if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1479 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1480 if (dfa->lex.parens == 0 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1481 && dfa->syntax.syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1482 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1483 dfa->lex.parens--; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1484 dfa->lex.laststart = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1485 return dfa->lex.lasttok = RPAREN; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1486 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1487 case '.': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1488 if (backslash) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1489 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1490 if (dfa->canychar == (size_t) -1) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1491 { |
18633
42cabb9832cd
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18632
diff
changeset
|
1492 charclass ccl; |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1493 fillset (&ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1494 if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE)) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1495 clrbit ('\n', &ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1496 if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1497 clrbit ('\0', &ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1498 if (dfa->localeinfo.multibyte) |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1499 for (int c2 = 0; c2 < NOTCHAR; c2++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1500 if (dfa->localeinfo.sbctowc[c2] == WEOF) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1501 clrbit (c2, &ccl); |
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1502 dfa->canychar = charclass_index (dfa, &ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1503 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1504 dfa->lex.laststart = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1505 return dfa->lex.lasttok = (dfa->localeinfo.multibyte |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1506 ? ANYCHAR |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1507 : CSET + dfa->canychar); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1508 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1509 case 's': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1510 case 'S': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1511 if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1512 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1513 if (!dfa->localeinfo.multibyte) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1514 { |
18633
42cabb9832cd
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18632
diff
changeset
|
1515 charclass ccl; |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1516 zeroset (&ccl); |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1517 for (int c2 = 0; c2 < NOTCHAR; ++c2) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1518 if (isspace (c2)) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1519 setbit (c2, &ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1520 if (c == 'S') |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1521 notset (&ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1522 dfa->lex.laststart = false; |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1523 return dfa->lex.lasttok = CSET + charclass_index (dfa, &ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1524 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1525 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1526 /* FIXME: see if optimizing this, as is done with ANYCHAR and |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1527 add_utf8_anychar, makes sense. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1528 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1529 /* \s and \S are documented to be equivalent to [[:space:]] and |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1530 [^[:space:]] respectively, so tell the lexer to process those |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1531 strings, each minus its "already processed" '['. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1532 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1533 struct lexptr ls; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1534 push_lex_state (dfa, &ls, &"^[:space:]]"[c == 's']); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1535 dfa->lex.lasttok = parse_bracket_exp (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1536 pop_lex_state (dfa, &ls); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1537 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1538 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1539 dfa->lex.laststart = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1540 return dfa->lex.lasttok; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1541 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1542 case 'w': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1543 case 'W': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1544 if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1545 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1546 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1547 if (!dfa->localeinfo.multibyte) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1548 { |
18633
42cabb9832cd
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18632
diff
changeset
|
1549 charclass ccl; |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1550 zeroset (&ccl); |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1551 for (int c2 = 0; c2 < NOTCHAR; ++c2) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1552 if (dfa->syntax.sbit[c2] == CTX_LETTER) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1553 setbit (c2, &ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1554 if (c == 'W') |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1555 notset (&ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1556 dfa->lex.laststart = false; |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1557 return dfa->lex.lasttok = CSET + charclass_index (dfa, &ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1558 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1559 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1560 /* FIXME: see if optimizing this, as is done with ANYCHAR and |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1561 add_utf8_anychar, makes sense. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1562 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1563 /* \w and \W are documented to be equivalent to [_[:alnum:]] and |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1564 [^_[:alnum:]] respectively, so tell the lexer to process those |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1565 strings, each minus its "already processed" '['. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1566 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1567 struct lexptr ls; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1568 push_lex_state (dfa, &ls, &"^_[:alnum:]]"[c == 'w']); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1569 dfa->lex.lasttok = parse_bracket_exp (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1570 pop_lex_state (dfa, &ls); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1571 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1572 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1573 dfa->lex.laststart = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1574 return dfa->lex.lasttok; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1575 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1576 case '[': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1577 if (backslash) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1578 goto normal_char; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1579 dfa->lex.laststart = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1580 return dfa->lex.lasttok = parse_bracket_exp (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1581 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1582 default: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1583 normal_char: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1584 dfa->lex.laststart = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1585 /* For multibyte character sets, folding is done in atom. Always |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1586 return WCHAR. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1587 if (dfa->localeinfo.multibyte) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1588 return dfa->lex.lasttok = WCHAR; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1589 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1590 if (dfa->syntax.case_fold && isalpha (c)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1591 { |
18633
42cabb9832cd
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18632
diff
changeset
|
1592 charclass ccl; |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1593 zeroset (&ccl); |
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1594 setbit_case_fold_c (c, &ccl); |
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1595 return dfa->lex.lasttok = CSET + charclass_index (dfa, &ccl); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1596 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1597 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1598 return dfa->lex.lasttok = c; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1599 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1600 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1601 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1602 /* The above loop should consume at most a backslash |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1603 and some other character. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1604 abort (); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1605 return END; /* keeps pedantic compilers happy. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1606 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1607 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1608 static void |
18620
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1609 addtok_mb (struct dfa *dfa, token t, char mbprop) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1610 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1611 if (dfa->talloc == dfa->tindex) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1612 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1613 dfa->tokens = x2nrealloc (dfa->tokens, &dfa->talloc, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1614 sizeof *dfa->tokens); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1615 if (dfa->localeinfo.multibyte) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1616 dfa->multibyte_prop = xnrealloc (dfa->multibyte_prop, dfa->talloc, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1617 sizeof *dfa->multibyte_prop); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1618 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1619 if (dfa->localeinfo.multibyte) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1620 dfa->multibyte_prop[dfa->tindex] = mbprop; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1621 dfa->tokens[dfa->tindex++] = t; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1622 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1623 switch (t) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1624 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1625 case QMARK: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1626 case STAR: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1627 case PLUS: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1628 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1629 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1630 case CAT: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1631 case OR: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1632 dfa->parse.depth--; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1633 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1634 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1635 case BACKREF: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1636 dfa->fast = false; |
18914
886945d1fa95
manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents:
18752
diff
changeset
|
1637 FALLTHROUGH; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1638 default: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1639 dfa->nleaves++; |
18914
886945d1fa95
manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents:
18752
diff
changeset
|
1640 FALLTHROUGH; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1641 case EMPTY: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1642 dfa->parse.depth++; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1643 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1644 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1645 if (dfa->parse.depth > dfa->depth) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1646 dfa->depth = dfa->parse.depth; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1647 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1648 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1649 static void addtok_wc (struct dfa *dfa, wint_t wc); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1650 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1651 /* Add the given token to the parse tree, maintaining the depth count and |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1652 updating the maximum depth if necessary. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1653 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1654 addtok (struct dfa *dfa, token t) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1655 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1656 if (dfa->localeinfo.multibyte && t == MBCSET) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1657 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1658 bool need_or = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1659 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1660 /* Extract wide characters into alternations for better performance. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1661 This does not require UTF-8. */ |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
1662 for (ptrdiff_t i = 0; i < dfa->lex.brack.nchars; i++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1663 { |
18620
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1664 addtok_wc (dfa, dfa->lex.brack.chars[i]); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1665 if (need_or) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1666 addtok (dfa, OR); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1667 need_or = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1668 } |
18620
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1669 dfa->lex.brack.nchars = 0; |
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1670 |
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1671 /* Wide characters have been handled above, so it is possible |
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1672 that the set is empty now. Do nothing in that case. */ |
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1673 if (dfa->lex.brack.cset != -1) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1674 { |
18620
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
1675 addtok (dfa, CSET + dfa->lex.brack.cset); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1676 if (need_or) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1677 addtok (dfa, OR); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1678 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1679 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1680 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1681 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1682 addtok_mb (dfa, t, 3); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1683 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1684 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1685 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1686 /* We treat a multibyte character as a single atom, so that DFA |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1687 can treat a multibyte character as a single expression. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1688 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1689 e.g., we construct the following tree from "<mb1><mb2>". |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1690 <mb1(1st-byte)><mb1(2nd-byte)><CAT><mb1(3rd-byte)><CAT> |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1691 <mb2(1st-byte)><mb2(2nd-byte)><CAT><mb2(3rd-byte)><CAT><CAT> */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1692 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1693 addtok_wc (struct dfa *dfa, wint_t wc) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1694 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1695 unsigned char buf[MB_LEN_MAX]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1696 mbstate_t s = { 0 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1697 size_t stored_bytes = wcrtomb ((char *) buf, wc, &s); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1698 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1699 if (stored_bytes != (size_t) -1) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1700 dfa->lex.cur_mb_len = stored_bytes; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1701 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1702 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1703 /* This is merely stop-gap. buf[0] is undefined, yet skipping |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1704 the addtok_mb call altogether can corrupt the heap. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1705 dfa->lex.cur_mb_len = 1; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1706 buf[0] = 0; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1707 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1708 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1709 addtok_mb (dfa, buf[0], dfa->lex.cur_mb_len == 1 ? 3 : 1); |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1710 for (int i = 1; i < dfa->lex.cur_mb_len; i++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1711 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1712 addtok_mb (dfa, buf[i], i == dfa->lex.cur_mb_len - 1 ? 2 : 0); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1713 addtok (dfa, CAT); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1714 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1715 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1716 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1717 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1718 add_utf8_anychar (struct dfa *dfa) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1719 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1720 static charclass const utf8_classes[5] = { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1721 /* 80-bf: non-leading bytes. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1722 CHARCLASS_INIT (0, 0, 0, 0, 0xffffffff, 0xffffffff, 0, 0), |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1723 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1724 /* 00-7f: 1-byte sequence. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1725 CHARCLASS_INIT (0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0, 0, 0, 0), |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1726 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1727 /* c2-df: 2-byte sequence. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1728 CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0xfffffffc, 0), |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1729 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1730 /* e0-ef: 3-byte sequence. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1731 CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0, 0xffff), |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1732 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1733 /* f0-f7: 4-byte sequence. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1734 CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0, 0xff0000) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1735 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1736 const unsigned int n = sizeof (utf8_classes) / sizeof (utf8_classes[0]); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1737 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1738 /* Define the five character classes that are needed below. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1739 if (dfa->utf8_anychar_classes[0] == 0) |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1740 for (unsigned int i = 0; i < n; i++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1741 { |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1742 charclass c = utf8_classes[i]; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1743 if (i == 1) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1744 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1745 if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE)) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1746 clrbit ('\n', &c); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1747 if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1748 clrbit ('\0', &c); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1749 } |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
1750 dfa->utf8_anychar_classes[i] = CSET + charclass_index (dfa, &c); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1751 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1752 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1753 /* A valid UTF-8 character is |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1754 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1755 ([0x00-0x7f] |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1756 |[0xc2-0xdf][0x80-0xbf] |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1757 |[0xe0-0xef[0x80-0xbf][0x80-0xbf] |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1758 |[0xf0-f7][0x80-0xbf][0x80-0xbf][0x80-0xbf]) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1759 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1760 which I'll write more concisely "B|CA|DAA|EAAA". Factor the [0x00-0x7f] |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1761 and you get "B|(C|(D|EA)A)A". And since the token buffer is in reverse |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1762 Polish notation, you get "B C D E A CAT OR A CAT OR A CAT OR". */ |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1763 unsigned int i; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1764 for (i = 1; i < n; i++) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1765 addtok (dfa, dfa->utf8_anychar_classes[i]); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1766 while (--i > 1) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1767 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1768 addtok (dfa, dfa->utf8_anychar_classes[0]); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1769 addtok (dfa, CAT); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1770 addtok (dfa, OR); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1771 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1772 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1773 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1774 /* The grammar understood by the parser is as follows. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1775 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1776 regexp: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1777 regexp OR branch |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1778 branch |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1779 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1780 branch: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1781 branch closure |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1782 closure |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1783 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1784 closure: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1785 closure QMARK |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1786 closure STAR |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1787 closure PLUS |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1788 closure REPMN |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1789 atom |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1790 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1791 atom: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1792 <normal character> |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1793 <multibyte character> |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1794 ANYCHAR |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1795 MBCSET |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1796 CSET |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1797 BACKREF |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1798 BEGLINE |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1799 ENDLINE |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1800 BEGWORD |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1801 ENDWORD |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1802 LIMWORD |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1803 NOTLIMWORD |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1804 LPAREN regexp RPAREN |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1805 <empty> |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1806 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1807 The parser builds a parse tree in postfix form in an array of tokens. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1808 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1809 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1810 atom (struct dfa *dfa) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1811 { |
39857
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1812 if ((0 <= dfa->parse.tok && dfa->parse.tok < NOTCHAR) |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1813 || dfa->parse.tok >= CSET |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1814 || dfa->parse.tok == BEG || dfa->parse.tok == BACKREF |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1815 || dfa->parse.tok == BEGLINE || dfa->parse.tok == ENDLINE |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1816 || dfa->parse.tok == BEGWORD || dfa->parse.tok == ENDWORD |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1817 || dfa->parse.tok == LIMWORD || dfa->parse.tok == NOTLIMWORD |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1818 || dfa->parse.tok == ANYCHAR || dfa->parse.tok == MBCSET) |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1819 { |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1820 if (dfa->parse.tok == ANYCHAR && dfa->localeinfo.using_utf8) |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1821 { |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1822 /* For UTF-8 expand the period to a series of CSETs that define a |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1823 valid UTF-8 character. This avoids using the slow multibyte |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1824 path. I'm pretty sure it would be both profitable and correct to |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1825 do it for any encoding; however, the optimization must be done |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1826 manually as it is done above in add_utf8_anychar. So, let's |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1827 start with UTF-8: it is the most used, and the structure of the |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1828 encoding makes the correctness more obvious. */ |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1829 add_utf8_anychar (dfa); |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1830 } |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1831 else |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1832 addtok (dfa, dfa->parse.tok); |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1833 dfa->parse.tok = lex (dfa); |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1834 } |
b0bc3272b80e
dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents:
39856
diff
changeset
|
1835 else if (dfa->parse.tok == WCHAR) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1836 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1837 if (dfa->lex.wctok == WEOF) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1838 addtok (dfa, BACKREF); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1839 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1840 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1841 addtok_wc (dfa, dfa->lex.wctok); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1842 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1843 if (dfa->syntax.case_fold) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1844 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1845 wchar_t folded[CASE_FOLDED_BUFSIZE]; |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1846 unsigned int n = case_folded_counterparts (dfa->lex.wctok, |
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1847 folded); |
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1848 for (unsigned int i = 0; i < n; i++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1849 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1850 addtok_wc (dfa, folded[i]); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1851 addtok (dfa, OR); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1852 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1853 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1854 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1855 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1856 dfa->parse.tok = lex (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1857 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1858 else if (dfa->parse.tok == LPAREN) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1859 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1860 dfa->parse.tok = lex (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1861 regexp (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1862 if (dfa->parse.tok != RPAREN) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1863 dfaerror (_("unbalanced (")); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1864 dfa->parse.tok = lex (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1865 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1866 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1867 addtok (dfa, EMPTY); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1868 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1869 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1870 /* Return the number of tokens in the given subexpression. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1871 static size_t _GL_ATTRIBUTE_PURE |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1872 nsubtoks (struct dfa const *dfa, size_t tindex) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1873 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1874 switch (dfa->tokens[tindex - 1]) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1875 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1876 default: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1877 return 1; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1878 case QMARK: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1879 case STAR: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1880 case PLUS: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1881 return 1 + nsubtoks (dfa, tindex - 1); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1882 case CAT: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1883 case OR: |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
1884 { |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
1885 size_t ntoks1 = nsubtoks (dfa, tindex - 1); |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
1886 return 1 + ntoks1 + nsubtoks (dfa, tindex - 1 - ntoks1); |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
1887 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1888 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1889 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1890 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1891 /* Copy the given subexpression to the top of the tree. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1892 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1893 copytoks (struct dfa *dfa, size_t tindex, size_t ntokens) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1894 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1895 if (dfa->localeinfo.multibyte) |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1896 for (size_t i = 0; i < ntokens; ++i) |
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1897 addtok_mb (dfa, dfa->tokens[tindex + i], |
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1898 dfa->multibyte_prop[tindex + i]); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1899 else |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1900 for (size_t i = 0; i < ntokens; ++i) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1901 addtok_mb (dfa, dfa->tokens[tindex + i], 3); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1902 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1903 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1904 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1905 closure (struct dfa *dfa) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1906 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1907 atom (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1908 while (dfa->parse.tok == QMARK || dfa->parse.tok == STAR |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1909 || dfa->parse.tok == PLUS || dfa->parse.tok == REPMN) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1910 if (dfa->parse.tok == REPMN && (dfa->lex.minrep || dfa->lex.maxrep)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1911 { |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1912 size_t ntokens = nsubtoks (dfa, dfa->tindex); |
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1913 size_t tindex = dfa->tindex - ntokens; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1914 if (dfa->lex.maxrep < 0) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1915 addtok (dfa, PLUS); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1916 if (dfa->lex.minrep == 0) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1917 addtok (dfa, QMARK); |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
1918 int i; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1919 for (i = 1; i < dfa->lex.minrep; i++) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1920 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1921 copytoks (dfa, tindex, ntokens); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1922 addtok (dfa, CAT); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1923 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1924 for (; i < dfa->lex.maxrep; i++) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1925 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1926 copytoks (dfa, tindex, ntokens); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1927 addtok (dfa, QMARK); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1928 addtok (dfa, CAT); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1929 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1930 dfa->parse.tok = lex (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1931 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1932 else if (dfa->parse.tok == REPMN) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1933 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1934 dfa->tindex -= nsubtoks (dfa, dfa->tindex); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1935 dfa->parse.tok = lex (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1936 closure (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1937 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1938 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1939 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1940 addtok (dfa, dfa->parse.tok); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1941 dfa->parse.tok = lex (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1942 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1943 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1944 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1945 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1946 branch (struct dfa* dfa) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1947 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1948 closure (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1949 while (dfa->parse.tok != RPAREN && dfa->parse.tok != OR |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1950 && dfa->parse.tok >= 0) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1951 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1952 closure (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1953 addtok (dfa, CAT); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1954 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1955 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1956 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1957 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1958 regexp (struct dfa *dfa) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1959 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1960 branch (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1961 while (dfa->parse.tok == OR) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1962 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1963 dfa->parse.tok = lex (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1964 branch (dfa); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1965 addtok (dfa, OR); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1966 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1967 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1968 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1969 /* Main entry point for the parser. S is a string to be parsed, len is the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1970 length of the string, so s can include NUL characters. D is a pointer to |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1971 the struct dfa to parse into. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1972 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1973 dfaparse (char const *s, size_t len, struct dfa *d) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1974 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1975 d->lex.ptr = s; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1976 d->lex.left = len; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1977 d->lex.lasttok = END; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1978 d->lex.laststart = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1979 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1980 if (!d->syntax.syntax_bits_set) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1981 dfaerror (_("no syntax specified")); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1982 |
39855
a29036ff511d
dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39760
diff
changeset
|
1983 if (!d->nregexps) |
a29036ff511d
dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39760
diff
changeset
|
1984 addtok (d, BEG); |
a29036ff511d
dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39760
diff
changeset
|
1985 |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1986 d->parse.tok = lex (d); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1987 d->parse.depth = d->depth; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1988 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1989 regexp (d); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1990 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1991 if (d->parse.tok != END) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1992 dfaerror (_("unbalanced )")); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1993 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1994 addtok (d, END - d->nregexps); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1995 addtok (d, CAT); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1996 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1997 if (d->nregexps) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1998 addtok (d, OR); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
1999 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2000 ++d->nregexps; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2001 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2002 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2003 /* Some primitives for operating on sets of positions. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2004 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2005 /* Copy one set to another. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2006 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2007 copy (position_set const *src, position_set *dst) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2008 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2009 if (dst->alloc < src->nelem) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2010 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2011 free (dst->elems); |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
2012 dst->elems = xpalloc (NULL, &dst->alloc, src->nelem - dst->alloc, -1, |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
2013 sizeof *dst->elems); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2014 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2015 dst->nelem = src->nelem; |
18673
8321b350b6d7
dfa: port to gcc -fsanitize=undefined
Paul Eggert <eggert@cs.ucla.edu>
parents:
18668
diff
changeset
|
2016 if (src->nelem != 0) |
8321b350b6d7
dfa: port to gcc -fsanitize=undefined
Paul Eggert <eggert@cs.ucla.edu>
parents:
18668
diff
changeset
|
2017 memcpy (dst->elems, src->elems, src->nelem * sizeof *dst->elems); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2018 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2019 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2020 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2021 alloc_position_set (position_set *s, size_t size) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2022 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2023 s->elems = xnmalloc (size, sizeof *s->elems); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2024 s->alloc = size; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2025 s->nelem = 0; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2026 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2027 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2028 /* Insert position P in set S. S is maintained in sorted order on |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2029 decreasing index. If there is already an entry in S with P.index |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2030 then merge (logically-OR) P's constraints into the one in S. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2031 S->elems must point to an array large enough to hold the resulting set. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2032 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2033 insert (position p, position_set *s) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2034 { |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
2035 ptrdiff_t count = s->nelem; |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
2036 ptrdiff_t lo = 0, hi = count; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2037 while (lo < hi) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2038 { |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
2039 ptrdiff_t mid = (lo + hi) >> 1; |
39954
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2040 if (s->elems[mid].index < p.index) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2041 lo = mid + 1; |
18608
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2042 else if (s->elems[mid].index == p.index) |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2043 { |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2044 s->elems[mid].constraint |= p.constraint; |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2045 return; |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2046 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2047 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2048 hi = mid; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2049 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2050 |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
2051 s->elems = maybe_realloc (s->elems, count, &s->alloc, -1, sizeof *s->elems); |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2052 for (ptrdiff_t i = count; i > lo; i--) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2053 s->elems[i] = s->elems[i - 1]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2054 s->elems[lo] = p; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2055 ++s->nelem; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2056 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2057 |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2058 static void |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2059 append (position p, position_set *s) |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2060 { |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2061 ptrdiff_t count = s->nelem; |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2062 s->elems = maybe_realloc (s->elems, count, &s->alloc, -1, sizeof *s->elems); |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2063 s->elems[s->nelem++] = p; |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2064 } |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2065 |
18608
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2066 /* Merge S1 and S2 (with the additional constraint C2) into M. The |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2067 result is as if the positions of S1, and of S2 with the additional |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2068 constraint C2, were inserted into an initially empty set. */ |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2069 static void |
18608
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2070 merge_constrained (position_set const *s1, position_set const *s2, |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2071 unsigned int c2, position_set *m) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2072 { |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
2073 ptrdiff_t i = 0, j = 0; |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
2074 |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
2075 if (m->alloc - s1->nelem < s2->nelem) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2076 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2077 free (m->elems); |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
2078 m->alloc = s1->nelem; |
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
2079 m->elems = xpalloc (NULL, &m->alloc, s2->nelem, -1, sizeof *m->elems); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2080 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2081 m->nelem = 0; |
18608
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2082 while (i < s1->nelem || j < s2->nelem) |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2083 if (! (j < s2->nelem) |
39954
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2084 || (i < s1->nelem && s1->elems[i].index <= s2->elems[j].index)) |
18608
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2085 { |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2086 unsigned int c = ((i < s1->nelem && j < s2->nelem |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2087 && s1->elems[i].index == s2->elems[j].index) |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2088 ? s2->elems[j++].constraint & c2 |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2089 : 0); |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2090 m->elems[m->nelem].index = s1->elems[i].index; |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2091 m->elems[m->nelem++].constraint = s1->elems[i++].constraint | c; |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2092 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2093 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2094 { |
18608
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2095 if (s2->elems[j].constraint & c2) |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2096 { |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2097 m->elems[m->nelem].index = s2->elems[j].index; |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2098 m->elems[m->nelem++].constraint = s2->elems[j].constraint & c2; |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2099 } |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2100 j++; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2101 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2102 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2103 |
18608
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2104 /* Merge two sets of positions into a third. The result is exactly as if |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2105 the positions of both sets were inserted into an initially empty set. */ |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2106 static void |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2107 merge (position_set const *s1, position_set const *s2, position_set *m) |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2108 { |
18651 | 2109 merge_constrained (s1, s2, -1, m); |
18608
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2110 } |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2111 |
39862
f61cd4b41f21
dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39861
diff
changeset
|
2112 static void |
f61cd4b41f21
dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39861
diff
changeset
|
2113 merge2 (position_set *dst, position_set const *src, position_set *m) |
f61cd4b41f21
dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39861
diff
changeset
|
2114 { |
f61cd4b41f21
dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39861
diff
changeset
|
2115 if (src->nelem < 4) |
f61cd4b41f21
dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39861
diff
changeset
|
2116 { |
f61cd4b41f21
dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39861
diff
changeset
|
2117 for (ptrdiff_t i = 0; i < src->nelem; ++i) |
f61cd4b41f21
dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39861
diff
changeset
|
2118 insert (src->elems[i], dst); |
f61cd4b41f21
dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39861
diff
changeset
|
2119 } |
f61cd4b41f21
dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39861
diff
changeset
|
2120 else |
f61cd4b41f21
dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39861
diff
changeset
|
2121 { |
f61cd4b41f21
dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39861
diff
changeset
|
2122 merge (src, dst, m); |
f61cd4b41f21
dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39861
diff
changeset
|
2123 copy (m, dst); |
f61cd4b41f21
dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39861
diff
changeset
|
2124 } |
f61cd4b41f21
dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39861
diff
changeset
|
2125 } |
f61cd4b41f21
dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39861
diff
changeset
|
2126 |
18608
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2127 /* Delete a position from a set. Return the nonzero constraint of the |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2128 deleted position, or zero if there was no such position. */ |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2129 static unsigned int |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2130 delete (size_t del, position_set *s) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2131 { |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2132 size_t count = s->nelem; |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2133 size_t lo = 0, hi = count; |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2134 while (lo < hi) |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2135 { |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2136 size_t mid = (lo + hi) >> 1; |
39954
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2137 if (s->elems[mid].index < del) |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2138 lo = mid + 1; |
18608
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2139 else if (s->elems[mid].index == del) |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2140 { |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2141 unsigned int c = s->elems[mid].constraint; |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2142 size_t i; |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2143 for (i = mid; i + 1 < count; i++) |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2144 s->elems[i] = s->elems[i + 1]; |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2145 s->nelem = i; |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2146 return c; |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2147 } |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2148 else |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2149 hi = mid; |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2150 } |
18608
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2151 return 0; |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2152 } |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2153 |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2154 /* Replace a position with the followed set. */ |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2155 static void |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2156 replace (position_set *dst, size_t del, position_set *add, |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2157 unsigned int constraint, position_set *tmp) |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2158 { |
18608
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2159 unsigned int c = delete (del, dst) & constraint; |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2160 |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2161 if (c) |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2162 { |
18608
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2163 copy (dst, tmp); |
4e21be41ec70
dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
18607
diff
changeset
|
2164 merge_constrained (tmp, add, c, dst); |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2165 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2166 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2167 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2168 /* Find the index of the state corresponding to the given position set with |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2169 the given preceding context, or create a new state if there is no such |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2170 state. Context tells whether we got here on a newline or letter. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2171 static state_num |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2172 state_index (struct dfa *d, position_set const *s, int context) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2173 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2174 size_t hash = 0; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2175 int constraint = 0; |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2176 state_num i; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2177 token first_end = 0; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2178 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2179 for (i = 0; i < s->nelem; ++i) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2180 hash ^= s->elems[i].index + s->elems[i].constraint; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2181 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2182 /* Try to find a state that exactly matches the proposed one. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2183 for (i = 0; i < d->sindex; ++i) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2184 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2185 if (hash != d->states[i].hash || s->nelem != d->states[i].elems.nelem |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2186 || context != d->states[i].context) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2187 continue; |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2188 state_num j; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2189 for (j = 0; j < s->nelem; ++j) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2190 if (s->elems[j].constraint != d->states[i].elems.elems[j].constraint |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2191 || s->elems[j].index != d->states[i].elems.elems[j].index) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2192 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2193 if (j == s->nelem) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2194 return i; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2195 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2196 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2197 #ifdef DEBUG |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2198 fprintf (stderr, "new state %zd\n nextpos:", i); |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2199 for (state_num j = 0; j < s->nelem; j++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2200 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2201 fprintf (stderr, " %zu:", s->elems[j].index); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2202 prtok (d->tokens[s->elems[j].index]); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2203 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2204 fprintf (stderr, "\n context:"); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2205 if (context ^ CTX_ANY) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2206 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2207 if (context & CTX_NONE) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2208 fprintf (stderr, " CTX_NONE"); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2209 if (context & CTX_LETTER) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2210 fprintf (stderr, " CTX_LETTER"); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2211 if (context & CTX_NEWLINE) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2212 fprintf (stderr, " CTX_NEWLINE"); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2213 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2214 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2215 fprintf (stderr, " CTX_ANY"); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2216 fprintf (stderr, "\n"); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2217 #endif |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2218 |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2219 for (state_num j = 0; j < s->nelem; j++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2220 { |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2221 int c = d->constraints[s->elems[j].index]; |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2222 |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2223 if (c != 0) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2224 { |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
2225 if (succeeds_in_context (c, context, CTX_ANY)) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2226 constraint |= c; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2227 if (!first_end) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2228 first_end = d->tokens[s->elems[j].index]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2229 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2230 else if (d->tokens[s->elems[j].index] == BACKREF) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2231 constraint = NO_CONSTRAINT; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2232 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2233 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2234 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2235 /* Create a new state. */ |
18559
900819251d51
dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents:
18558
diff
changeset
|
2236 d->states = maybe_realloc (d->states, d->sindex, &d->salloc, -1, |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2237 sizeof *d->states); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2238 d->states[i].hash = hash; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2239 alloc_position_set (&d->states[i].elems, s->nelem); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2240 copy (s, &d->states[i].elems); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2241 d->states[i].context = context; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2242 d->states[i].constraint = constraint; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2243 d->states[i].first_end = first_end; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2244 d->states[i].mbps.nelem = 0; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2245 d->states[i].mbps.elems = NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2246 d->states[i].mb_trindex = -1; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2247 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2248 ++d->sindex; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2249 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2250 return i; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2251 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2252 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2253 /* Find the epsilon closure of a set of positions. If any position of the set |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2254 contains a symbol that matches the empty string in some context, replace |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2255 that position with the elements of its follow labeled with an appropriate |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2256 constraint. Repeat exhaustively until no funny positions are left. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2257 S->elems must be large enough to hold the result. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2258 static void |
39953
2f4c84e23e3c
dfa: remove unneeded code
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39862
diff
changeset
|
2259 epsclosure (struct dfa const *d) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2260 { |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2261 position_set tmp; |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2262 alloc_position_set (&tmp, d->nleaves); |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2263 for (size_t i = 0; i < d->tindex; ++i) |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2264 if (d->follows[i].nelem > 0 && d->tokens[i] >= NOTCHAR |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2265 && d->tokens[i] != BACKREF && d->tokens[i] != ANYCHAR |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2266 && d->tokens[i] != MBCSET && d->tokens[i] < CSET) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2267 { |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2268 unsigned int constraint; |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2269 switch (d->tokens[i]) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2270 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2271 case BEGLINE: |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2272 constraint = BEGLINE_CONSTRAINT; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2273 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2274 case ENDLINE: |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2275 constraint = ENDLINE_CONSTRAINT; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2276 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2277 case BEGWORD: |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2278 constraint = BEGWORD_CONSTRAINT; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2279 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2280 case ENDWORD: |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2281 constraint = ENDWORD_CONSTRAINT; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2282 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2283 case LIMWORD: |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2284 constraint = LIMWORD_CONSTRAINT; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2285 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2286 case NOTLIMWORD: |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2287 constraint = NOTLIMWORD_CONSTRAINT; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2288 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2289 default: |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2290 constraint = NO_CONSTRAINT; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2291 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2292 } |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2293 |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2294 delete (i, &d->follows[i]); |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2295 |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2296 for (size_t j = 0; j < d->tindex; j++) |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2297 if (i != j && d->follows[j].nelem > 0) |
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2298 replace (&d->follows[j], i, &d->follows[i], constraint, &tmp); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2299 } |
18679
a68d8ef26d2a
dfa: fix memory leak in parse
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18673
diff
changeset
|
2300 free (tmp.elems); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2301 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2302 |
40047
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2303 /* Returns the set of contexts for which there is at least one |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2304 character included in C. */ |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2305 |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2306 static int |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2307 charclass_context (struct dfa const *dfa, charclass const *c) |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2308 { |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2309 int context = 0; |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2310 |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2311 for (unsigned int j = 0; j < CHARCLASS_WORDS; ++j) |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2312 { |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2313 if (c->w[j] & dfa->syntax.newline.w[j]) |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2314 context |= CTX_NEWLINE; |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2315 if (c->w[j] & dfa->syntax.letters.w[j]) |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2316 context |= CTX_LETTER; |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2317 if (c->w[j] & ~(dfa->syntax.letters.w[j] | dfa->syntax.newline.w[j])) |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2318 context |= CTX_NONE; |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2319 } |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2320 |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2321 return context; |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2322 } |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
2323 |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2324 /* Returns the contexts on which the position set S depends. Each context |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2325 in the set of returned contexts (let's call it SC) may have a different |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2326 follow set than other contexts in SC, and also different from the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2327 follow set of the complement set (sc ^ CTX_ANY). However, all contexts |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2328 in the complement set will have the same follow set. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2329 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2330 static int _GL_ATTRIBUTE_PURE |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2331 state_separate_contexts (struct dfa *d, position_set const *s) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2332 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2333 int separate_contexts = 0; |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2334 |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2335 for (size_t j = 0; j < s->nelem; j++) |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2336 separate_contexts |= d->separates[s->elems[j].index]; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2337 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2338 return separate_contexts; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2339 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2340 |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2341 enum |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2342 { |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2343 /* Single token is repeated. It is distinguished from non-repeated. */ |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2344 OPT_REPEAT = (1 << 0), |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2345 |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2346 /* Multiple tokens are repeated. This flag is on at head of tokens. The |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2347 node is not merged. */ |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2348 OPT_LPAREN = (1 << 1), |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2349 |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2350 /* Multiple branches are joined. The node is not merged. */ |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2351 OPT_RPAREN = (1 << 2), |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2352 |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2353 /* The node is walked. If the node is found in walking again, OPT_RPAREN |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2354 flag is turned on. */ |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2355 OPT_WALKED = (1 << 3), |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2356 |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2357 /* The node is queued. The node is not queued again. */ |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2358 OPT_QUEUED = (1 << 4) |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2359 }; |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2360 |
39955
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2361 static void |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2362 merge_nfa_state (struct dfa *d, size_t tindex, char *flags, |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2363 position_set *merged) |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2364 { |
39858
f1a9693c37be
dfa: prune states as we go
Paul Eggert <eggert@cs.ucla.edu>
parents:
39857
diff
changeset
|
2365 position_set *follows = d->follows; |
f1a9693c37be
dfa: prune states as we go
Paul Eggert <eggert@cs.ucla.edu>
parents:
39857
diff
changeset
|
2366 ptrdiff_t nelem = 0; |
f1a9693c37be
dfa: prune states as we go
Paul Eggert <eggert@cs.ucla.edu>
parents:
39857
diff
changeset
|
2367 |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2368 d->constraints[tindex] = 0; |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2369 |
39858
f1a9693c37be
dfa: prune states as we go
Paul Eggert <eggert@cs.ucla.edu>
parents:
39857
diff
changeset
|
2370 for (ptrdiff_t i = 0; i < follows[tindex].nelem; i++) |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2371 { |
39955
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2372 size_t sindex = follows[tindex].elems[i].index; |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2373 |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2374 /* Skip the node as pruned in future. */ |
39858
f1a9693c37be
dfa: prune states as we go
Paul Eggert <eggert@cs.ucla.edu>
parents:
39857
diff
changeset
|
2375 unsigned int iconstraint = follows[tindex].elems[i].constraint; |
f1a9693c37be
dfa: prune states as we go
Paul Eggert <eggert@cs.ucla.edu>
parents:
39857
diff
changeset
|
2376 if (iconstraint == 0) |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2377 continue; |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2378 |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2379 if (d->tokens[follows[tindex].elems[i].index] <= END) |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2380 { |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2381 d->constraints[tindex] |= follows[tindex].elems[i].constraint; |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2382 continue; |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2383 } |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2384 |
39955
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2385 if (!(flags[sindex] & (OPT_LPAREN | OPT_RPAREN))) |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2386 { |
39955
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2387 ptrdiff_t j; |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2388 |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2389 for (j = 0; j < nelem; j++) |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2390 { |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2391 size_t dindex = follows[tindex].elems[j].index; |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2392 |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2393 if (follows[tindex].elems[j].constraint != iconstraint) |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2394 continue; |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2395 |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2396 if (flags[dindex] & (OPT_LPAREN | OPT_RPAREN)) |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2397 continue; |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2398 |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2399 if (d->tokens[sindex] != d->tokens[dindex]) |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2400 continue; |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2401 |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2402 if ((flags[sindex] ^ flags[dindex]) & OPT_REPEAT) |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2403 continue; |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2404 |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2405 if (flags[sindex] & OPT_REPEAT) |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2406 delete (sindex, &follows[sindex]); |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2407 |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2408 merge2 (&follows[dindex], &follows[sindex], merged); |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2409 |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2410 break; |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2411 } |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2412 |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2413 if (j < nelem) |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2414 continue; |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2415 } |
39955
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2416 |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2417 follows[tindex].elems[nelem++] = follows[tindex].elems[i]; |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2418 flags[sindex] |= OPT_QUEUED; |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2419 } |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2420 |
39858
f1a9693c37be
dfa: prune states as we go
Paul Eggert <eggert@cs.ucla.edu>
parents:
39857
diff
changeset
|
2421 follows[tindex].nelem = nelem; |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2422 } |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2423 |
39957
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2424 static int |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2425 compare (const void *a, const void *b) |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2426 { |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2427 int aindex; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2428 int bindex; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2429 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2430 aindex = (int) ((position *) a)->index; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2431 bindex = (int) ((position *) b)->index; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2432 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2433 return aindex - bindex; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2434 } |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2435 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2436 static void |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2437 reorder_tokens (struct dfa *d) |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2438 { |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2439 ptrdiff_t nleaves; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2440 ptrdiff_t *map; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2441 token *tokens; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2442 position_set *follows; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2443 int *constraints; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2444 char *multibyte_prop; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2445 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2446 nleaves = 0; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2447 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2448 map = xnmalloc (d->tindex, sizeof *map); |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2449 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2450 map[0] = nleaves++; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2451 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2452 for (ptrdiff_t i = 1; i < d->tindex; i++) |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2453 map[i] = -1; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2454 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2455 tokens = xnmalloc (d->nleaves, sizeof *tokens); |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2456 follows = xnmalloc (d->nleaves, sizeof *follows); |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2457 constraints = xnmalloc (d->nleaves, sizeof *constraints); |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2458 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2459 if (d->localeinfo.multibyte) |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2460 multibyte_prop = xnmalloc (d->nleaves, sizeof *multibyte_prop); |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2461 else |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2462 multibyte_prop = NULL; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2463 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2464 for (ptrdiff_t i = 0; i < d->tindex; i++) |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2465 { |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2466 if (map[i] == -1) |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2467 { |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2468 free (d->follows[i].elems); |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2469 d->follows[i].elems = NULL; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2470 d->follows[i].nelem = 0; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2471 continue; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2472 } |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2473 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2474 tokens[map[i]] = d->tokens[i]; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2475 follows[map[i]] = d->follows[i]; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2476 constraints[map[i]] = d->constraints[i]; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2477 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2478 if (multibyte_prop != NULL) |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2479 multibyte_prop[map[i]] = d->multibyte_prop[i]; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2480 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2481 for (ptrdiff_t j = 0; j < d->follows[i].nelem; j++) |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2482 { |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2483 if (map[d->follows[i].elems[j].index] == -1) |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2484 map[d->follows[i].elems[j].index] = nleaves++; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2485 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2486 d->follows[i].elems[j].index = map[d->follows[i].elems[j].index]; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2487 } |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2488 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2489 qsort (d->follows[i].elems, d->follows[i].nelem, |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2490 sizeof *d->follows[i].elems, compare); |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2491 } |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2492 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2493 for (ptrdiff_t i = 0; i < nleaves; i++) |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2494 { |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2495 d->tokens[i] = tokens[i]; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2496 d->follows[i] = follows[i]; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2497 d->constraints[i] = constraints[i]; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2498 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2499 if (multibyte_prop != NULL) |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2500 d->multibyte_prop[i] = multibyte_prop[i]; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2501 } |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2502 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2503 d->tindex = d->nleaves = nleaves; |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2504 |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2505 free (tokens); |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2506 free (follows); |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2507 free (constraints); |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2508 free (multibyte_prop); |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2509 free (map); |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2510 } |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2511 |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2512 static void |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2513 dfaoptimize (struct dfa *d) |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2514 { |
39859
1f2a63e46815
dfa: tweak allocation performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
39858
diff
changeset
|
2515 char *flags; |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2516 position_set merged0; |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2517 position_set *merged; |
39955
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2518 |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2519 flags = xmalloc (d->tindex * sizeof *flags); |
39859
1f2a63e46815
dfa: tweak allocation performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
39858
diff
changeset
|
2520 memset (flags, 0, d->tindex * sizeof *flags); |
1f2a63e46815
dfa: tweak allocation performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
39858
diff
changeset
|
2521 |
1f2a63e46815
dfa: tweak allocation performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
39858
diff
changeset
|
2522 for (size_t i = 0; i < d->tindex; i++) |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2523 { |
39859
1f2a63e46815
dfa: tweak allocation performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
39858
diff
changeset
|
2524 for (ptrdiff_t j = 0; j < d->follows[i].nelem; j++) |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2525 { |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2526 if (d->follows[i].elems[j].index == i) |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2527 flags[d->follows[i].elems[j].index] |= OPT_REPEAT; |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2528 else if (d->follows[i].elems[j].index < i) |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2529 flags[d->follows[i].elems[j].index] |= OPT_LPAREN; |
39859
1f2a63e46815
dfa: tweak allocation performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
39858
diff
changeset
|
2530 else if (flags[d->follows[i].elems[j].index] &= OPT_WALKED) |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2531 flags[d->follows[i].elems[j].index] |= OPT_RPAREN; |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2532 else |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2533 flags[d->follows[i].elems[j].index] |= OPT_WALKED; |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2534 } |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2535 } |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2536 |
39955
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2537 flags[0] |= OPT_QUEUED; |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2538 |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2539 merged = &merged0; |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2540 alloc_position_set (merged, d->nleaves); |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2541 |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2542 d->constraints = xnmalloc (d->tindex, sizeof *d->constraints); |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2543 |
39955
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2544 for (ptrdiff_t i = 0; i < d->tindex; i++) |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2545 if (flags[i] & OPT_QUEUED) |
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2546 merge_nfa_state (d, i, flags, merged); |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2547 |
39957
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2548 reorder_tokens (d); |
6d6c0b94693c
dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39956
diff
changeset
|
2549 |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2550 free (merged->elems); |
39955
7c568600d07f
dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39954
diff
changeset
|
2551 free (flags); |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2552 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2553 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2554 /* Perform bottom-up analysis on the parse tree, computing various functions. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2555 Note that at this point, we're pretending constructs like \< are real |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2556 characters rather than constraints on what can follow them. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2557 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2558 Nullable: A node is nullable if it is at the root of a regexp that can |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2559 match the empty string. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2560 * EMPTY leaves are nullable. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2561 * No other leaf is nullable. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2562 * A QMARK or STAR node is nullable. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2563 * A PLUS node is nullable if its argument is nullable. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2564 * A CAT node is nullable if both its arguments are nullable. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2565 * An OR node is nullable if either argument is nullable. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2566 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2567 Firstpos: The firstpos of a node is the set of positions (nonempty leaves) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2568 that could correspond to the first character of a string matching the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2569 regexp rooted at the given node. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2570 * EMPTY leaves have empty firstpos. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2571 * The firstpos of a nonempty leaf is that leaf itself. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2572 * The firstpos of a QMARK, STAR, or PLUS node is the firstpos of its |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2573 argument. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2574 * The firstpos of a CAT node is the firstpos of the left argument, union |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2575 the firstpos of the right if the left argument is nullable. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2576 * The firstpos of an OR node is the union of firstpos of each argument. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2577 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2578 Lastpos: The lastpos of a node is the set of positions that could |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2579 correspond to the last character of a string matching the regexp at |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2580 the given node. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2581 * EMPTY leaves have empty lastpos. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2582 * The lastpos of a nonempty leaf is that leaf itself. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2583 * The lastpos of a QMARK, STAR, or PLUS node is the lastpos of its |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2584 argument. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2585 * The lastpos of a CAT node is the lastpos of its right argument, union |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2586 the lastpos of the left if the right argument is nullable. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2587 * The lastpos of an OR node is the union of the lastpos of each argument. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2588 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2589 Follow: The follow of a position is the set of positions that could |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2590 correspond to the character following a character matching the node in |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2591 a string matching the regexp. At this point we consider special symbols |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2592 that match the empty string in some context to be just normal characters. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2593 Later, if we find that a special symbol is in a follow set, we will |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2594 replace it with the elements of its follow, labeled with an appropriate |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2595 constraint. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2596 * Every node in the firstpos of the argument of a STAR or PLUS node is in |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2597 the follow of every node in the lastpos. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2598 * Every node in the firstpos of the second argument of a CAT node is in |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2599 the follow of every node in the lastpos of the first argument. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2600 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2601 Because of the postfix representation of the parse tree, the depth-first |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2602 analysis is conveniently done by a linear scan with the aid of a stack. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2603 Sets are stored as arrays of the elements, obeying a stack-like allocation |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2604 scheme; the number of elements in each set deeper in the stack can be |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2605 used to determine the address of a particular set's array. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2606 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2607 dfaanalyze (struct dfa *d, bool searchflag) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2608 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2609 /* Array allocated to hold position sets. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2610 position *posalloc = xnmalloc (d->nleaves, 2 * sizeof *posalloc); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2611 /* Firstpos and lastpos elements. */ |
39954
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2612 position *firstpos = posalloc; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2613 position *lastpos = firstpos + d->nleaves; |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2614 position pos; |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2615 position_set tmp; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2616 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2617 /* Stack for element counts and nullable flags. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2618 struct |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2619 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2620 /* Whether the entry is nullable. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2621 bool nullable; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2622 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2623 /* Counts of firstpos and lastpos sets. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2624 size_t nfirstpos; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2625 size_t nlastpos; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2626 } *stkalloc = xnmalloc (d->depth, sizeof *stkalloc), *stk = stkalloc; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2627 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2628 position_set merged; /* Result of merging sets. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2629 |
39855
a29036ff511d
dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39760
diff
changeset
|
2630 addtok (d, CAT); |
a29036ff511d
dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39760
diff
changeset
|
2631 |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2632 #ifdef DEBUG |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2633 fprintf (stderr, "dfaanalyze:\n"); |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
2634 for (size_t i = 0; i < d->tindex; ++i) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2635 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2636 fprintf (stderr, " %zu:", i); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2637 prtok (d->tokens[i]); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2638 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2639 putc ('\n', stderr); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2640 #endif |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2641 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2642 d->searchflag = searchflag; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2643 alloc_position_set (&merged, d->nleaves); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2644 d->follows = xcalloc (d->tindex, sizeof *d->follows); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2645 |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
2646 for (size_t i = 0; i < d->tindex; ++i) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2647 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2648 switch (d->tokens[i]) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2649 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2650 case EMPTY: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2651 /* The empty set is nullable. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2652 stk->nullable = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2653 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2654 /* The firstpos and lastpos of the empty leaf are both empty. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2655 stk->nfirstpos = stk->nlastpos = 0; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2656 stk++; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2657 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2658 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2659 case STAR: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2660 case PLUS: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2661 /* Every element in the firstpos of the argument is in the follow |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2662 of every element in the lastpos. */ |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2663 { |
39954
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2664 tmp.elems = firstpos - stk[-1].nfirstpos; |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2665 tmp.nelem = stk[-1].nfirstpos; |
40026
c51e38088432
dfa: avoid new warnings from gcc
Jim Meyering <meyering@fb.com>
parents:
39958
diff
changeset
|
2666 position *p = lastpos - stk[-1].nlastpos; |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2667 for (size_t j = 0; j < stk[-1].nlastpos; j++) |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2668 { |
40026
c51e38088432
dfa: avoid new warnings from gcc
Jim Meyering <meyering@fb.com>
parents:
39958
diff
changeset
|
2669 merge (&tmp, &d->follows[p[j].index], &merged); |
c51e38088432
dfa: avoid new warnings from gcc
Jim Meyering <meyering@fb.com>
parents:
39958
diff
changeset
|
2670 copy (&merged, &d->follows[p[j].index]); |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2671 } |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2672 } |
18914
886945d1fa95
manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents:
18752
diff
changeset
|
2673 FALLTHROUGH; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2674 case QMARK: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2675 /* A QMARK or STAR node is automatically nullable. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2676 if (d->tokens[i] != PLUS) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2677 stk[-1].nullable = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2678 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2679 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2680 case CAT: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2681 /* Every element in the firstpos of the second argument is in the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2682 follow of every element in the lastpos of the first argument. */ |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2683 { |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2684 tmp.nelem = stk[-1].nfirstpos; |
39954
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2685 tmp.elems = firstpos - stk[-1].nfirstpos; |
40026
c51e38088432
dfa: avoid new warnings from gcc
Jim Meyering <meyering@fb.com>
parents:
39958
diff
changeset
|
2686 position *p = lastpos - stk[-1].nlastpos - stk[-2].nlastpos; |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2687 for (size_t j = 0; j < stk[-2].nlastpos; j++) |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2688 { |
40026
c51e38088432
dfa: avoid new warnings from gcc
Jim Meyering <meyering@fb.com>
parents:
39958
diff
changeset
|
2689 merge (&tmp, &d->follows[p[j].index], &merged); |
c51e38088432
dfa: avoid new warnings from gcc
Jim Meyering <meyering@fb.com>
parents:
39958
diff
changeset
|
2690 copy (&merged, &d->follows[p[j].index]); |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2691 } |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2692 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2693 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2694 /* The firstpos of a CAT node is the firstpos of the first argument, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2695 union that of the second argument if the first is nullable. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2696 if (stk[-2].nullable) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2697 stk[-2].nfirstpos += stk[-1].nfirstpos; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2698 else |
39954
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2699 firstpos -= stk[-1].nfirstpos; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2700 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2701 /* The lastpos of a CAT node is the lastpos of the second argument, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2702 union that of the first argument if the second is nullable. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2703 if (stk[-1].nullable) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2704 stk[-2].nlastpos += stk[-1].nlastpos; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2705 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2706 { |
40026
c51e38088432
dfa: avoid new warnings from gcc
Jim Meyering <meyering@fb.com>
parents:
39958
diff
changeset
|
2707 position *p = lastpos - stk[-1].nlastpos - stk[-2].nlastpos; |
39954
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2708 for (size_t j = 0; j < stk[-1].nlastpos; j++) |
40026
c51e38088432
dfa: avoid new warnings from gcc
Jim Meyering <meyering@fb.com>
parents:
39958
diff
changeset
|
2709 p[j] = p[j + stk[-2].nlastpos]; |
39954
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2710 lastpos -= stk[-2].nlastpos; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2711 stk[-2].nlastpos = stk[-1].nlastpos; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2712 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2713 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2714 /* A CAT node is nullable if both arguments are nullable. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2715 stk[-2].nullable &= stk[-1].nullable; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2716 stk--; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2717 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2718 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2719 case OR: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2720 /* The firstpos is the union of the firstpos of each argument. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2721 stk[-2].nfirstpos += stk[-1].nfirstpos; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2722 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2723 /* The lastpos is the union of the lastpos of each argument. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2724 stk[-2].nlastpos += stk[-1].nlastpos; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2725 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2726 /* An OR node is nullable if either argument is nullable. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2727 stk[-2].nullable |= stk[-1].nullable; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2728 stk--; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2729 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2730 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2731 default: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2732 /* Anything else is a nonempty position. (Note that special |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2733 constructs like \< are treated as nonempty strings here; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2734 an "epsilon closure" effectively makes them nullable later. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2735 Backreferences have to get a real position so we can detect |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2736 transitions on them later. But they are nullable. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2737 stk->nullable = d->tokens[i] == BACKREF; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2738 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2739 /* This position is in its own firstpos and lastpos. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2740 stk->nfirstpos = stk->nlastpos = 1; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2741 stk++; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2742 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2743 firstpos->index = lastpos->index = i; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2744 firstpos->constraint = lastpos->constraint = NO_CONSTRAINT; |
39954
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2745 firstpos++, lastpos++; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2746 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2747 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2748 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2749 #ifdef DEBUG |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2750 /* ... balance the above nonsyntactic #ifdef goo... */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2751 fprintf (stderr, "node %zu:", i); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2752 prtok (d->tokens[i]); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2753 putc ('\n', stderr); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2754 fprintf (stderr, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2755 stk[-1].nullable ? " nullable: yes\n" : " nullable: no\n"); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2756 fprintf (stderr, " firstpos:"); |
39954
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2757 for (size_t j = 0; j < stk[-1].nfirstpos; j++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2758 { |
39954
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2759 fprintf (stderr, " %zu:", firstpos[j - stk[-1].nfirstpos].index); |
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2760 prtok (d->tokens[firstpos[j - stk[-1].nfirstpos].index]); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2761 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2762 fprintf (stderr, "\n lastpos:"); |
39954
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2763 for (size_t j = 0; j < stk[-1].nlastpos; j++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2764 { |
39954
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2765 fprintf (stderr, " %zu:", lastpos[j - stk[-1].nlastpos].index); |
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2766 prtok (d->tokens[lastpos[j - stk[-1].nlastpos].index]); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2767 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2768 putc ('\n', stderr); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2769 #endif |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2770 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2771 |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2772 /* For each follow set that is the follow set of a real position, replace |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2773 it with its epsilon closure. */ |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2774 epsclosure (d); |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2775 |
39856
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2776 dfaoptimize (d); |
469c01483bf1
dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39855
diff
changeset
|
2777 |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2778 #ifdef DEBUG |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
2779 for (size_t i = 0; i < d->tindex; ++i) |
39953
2f4c84e23e3c
dfa: remove unneeded code
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39862
diff
changeset
|
2780 if (d->tokens[i] == BEG || d->tokens[i] < NOTCHAR |
2f4c84e23e3c
dfa: remove unneeded code
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39862
diff
changeset
|
2781 || d->tokens[i] == BACKREF || d->tokens[i] == ANYCHAR |
2f4c84e23e3c
dfa: remove unneeded code
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39862
diff
changeset
|
2782 || d->tokens[i] == MBCSET || d->tokens[i] >= CSET) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2783 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2784 fprintf (stderr, "follows(%zu:", i); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2785 prtok (d->tokens[i]); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2786 fprintf (stderr, "):"); |
39954
b6666dd9d140
dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39953
diff
changeset
|
2787 for (size_t j = 0; j < d->follows[i].nelem; j++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2788 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2789 fprintf (stderr, " %zu:", d->follows[i].elems[j].index); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2790 prtok (d->tokens[d->follows[i].elems[j].index]); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2791 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2792 putc ('\n', stderr); |
18607
db280259d3cc
dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18560
diff
changeset
|
2793 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2794 #endif |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2795 |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2796 pos.index = 0; |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2797 pos.constraint = NO_CONSTRAINT; |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2798 |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2799 alloc_position_set (&tmp, 1); |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2800 |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2801 append (pos, &tmp); |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2802 |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2803 d->separates = xnmalloc (d->tindex, sizeof *d->separates); |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2804 |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2805 for (ptrdiff_t i = 0; i < d->tindex; i++) |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2806 { |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2807 d->separates[i] = 0; |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2808 |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2809 if (prev_newline_dependent (d->constraints[i])) |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2810 d->separates[i] |= CTX_NEWLINE; |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2811 if (prev_letter_dependent (d->constraints[i])) |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2812 d->separates[i] |= CTX_LETTER; |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2813 |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2814 for (ptrdiff_t j = 0; j < d->follows[i].nelem; j++) |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2815 { |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2816 if (prev_newline_dependent (d->follows[i].elems[j].constraint)) |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2817 d->separates[i] |= CTX_NEWLINE; |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2818 if (prev_letter_dependent (d->follows[i].elems[j].constraint)) |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2819 d->separates[i] |= CTX_LETTER; |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2820 } |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2821 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2822 |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2823 /* Context wanted by some position. */ |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2824 int separate_contexts = state_separate_contexts (d, &tmp); |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
2825 |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2826 /* Build the initial state. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2827 if (separate_contexts & CTX_NEWLINE) |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2828 state_index (d, &tmp, CTX_NEWLINE); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2829 d->initstate_notbol = d->min_trcount |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2830 = state_index (d, &tmp, separate_contexts ^ CTX_ANY); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2831 if (separate_contexts & CTX_LETTER) |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2832 d->min_trcount = state_index (d, &tmp, CTX_LETTER); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2833 d->min_trcount++; |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
2834 d->trcount = 0; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2835 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2836 free (posalloc); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2837 free (stkalloc); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2838 free (merged.elems); |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2839 free (tmp.elems); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2840 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2841 |
18658
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2842 /* Make sure D's state arrays are large enough to hold NEW_STATE. */ |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2843 static void |
18659
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
2844 realloc_trans_if_necessary (struct dfa *d) |
18658
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2845 { |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2846 state_num oldalloc = d->tralloc; |
18659
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
2847 if (oldalloc < d->sindex) |
18658
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2848 { |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2849 state_num **realtrans = d->trans ? d->trans - 2 : NULL; |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2850 ptrdiff_t newalloc1 = realtrans ? d->tralloc + 2 : 0; |
18659
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
2851 realtrans = xpalloc (realtrans, &newalloc1, d->sindex - oldalloc, |
18658
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2852 -1, sizeof *realtrans); |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2853 realtrans[0] = realtrans[1] = NULL; |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2854 d->trans = realtrans + 2; |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2855 ptrdiff_t newalloc = d->tralloc = newalloc1 - 2; |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2856 d->fails = xnrealloc (d->fails, newalloc, sizeof *d->fails); |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2857 d->success = xnrealloc (d->success, newalloc, sizeof *d->success); |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2858 d->newlines = xnrealloc (d->newlines, newalloc, sizeof *d->newlines); |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2859 if (d->localeinfo.multibyte) |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2860 { |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2861 realtrans = d->mb_trans ? d->mb_trans - 2 : NULL; |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2862 realtrans = xnrealloc (realtrans, newalloc1, sizeof *realtrans); |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2863 if (oldalloc == 0) |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2864 realtrans[0] = realtrans[1] = NULL; |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2865 d->mb_trans = realtrans + 2; |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2866 } |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2867 for (; oldalloc < newalloc; oldalloc++) |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2868 { |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2869 d->trans[oldalloc] = NULL; |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2870 d->fails[oldalloc] = NULL; |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2871 if (d->localeinfo.multibyte) |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2872 d->mb_trans[oldalloc] = NULL; |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2873 } |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2874 } |
384886b3e35b
dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents:
18651
diff
changeset
|
2875 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2876 |
18660
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2877 /* |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2878 Calculate the transition table for a new state derived from state s |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2879 for a compiled dfa d after input character uc, and return the new |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2880 state number. |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
2881 |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
2882 Do not worry about all possible input characters; calculate just the group |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
2883 of positions that match uc. Label it with the set of characters that |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2884 every position in the group matches (taking into account, if necessary, |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
2885 preceding context information of s). Then find the union |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
2886 of these positions' follows, i.e., the set of positions of the |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2887 new state. For each character in the group's label, set the transition |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2888 on this character to be to a state corresponding to the set's positions, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2889 and its associated backward context information, if necessary. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2890 |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
2891 When building a searching matcher, include the positions of state |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2892 0 in every state. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2893 |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
2894 The group is constructed by building an equivalence-class |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2895 partition of the positions of s. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2896 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2897 For each position, find the set of characters C that it matches. Eliminate |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2898 any characters from C that fail on grounds of backward context. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2899 |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
2900 Check whether the group's label L has nonempty |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2901 intersection with C. If L - C is nonempty, create a new group labeled |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2902 L - C and having the same positions as the current group, and set L to |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
2903 the intersection of L and C. Insert the position in the group, set |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2904 C = C - L, and resume scanning. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2905 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2906 If after comparing with every group there are characters remaining in C, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2907 create a new group labeled with the characters of C and insert this |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2908 position in that group. */ |
18660
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2909 |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
2910 static state_num |
18660
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2911 build_state (state_num s, struct dfa *d, unsigned char uc) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2912 { |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2913 position_set follows; /* Union of the follows for each |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2914 position of the current state. */ |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2915 position_set group; /* Positions that match the input char. */ |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2916 position_set tmp; /* Temporary space for merging sets. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2917 state_num state; /* New state. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2918 state_num state_newline; /* New state on a newline transition. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2919 state_num state_letter; /* New state on a letter transition. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2920 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2921 #ifdef DEBUG |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2922 fprintf (stderr, "build state %td\n", s); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2923 #endif |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2924 |
18660
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2925 /* A pointer to the new transition table, and the table itself. */ |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2926 state_num **ptrans = (accepting (s, d) ? d->fails : d->trans) + s; |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2927 state_num *trans = *ptrans; |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2928 |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2929 if (!trans) |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2930 { |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2931 /* MAX_TRCOUNT is an arbitrary upper limit on the number of |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2932 transition tables that can exist at once, other than for |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2933 initial states. Often-used transition tables are quickly |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2934 rebuilt, whereas rarely-used ones are cleared away. */ |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2935 if (MAX_TRCOUNT <= d->trcount) |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2936 { |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2937 for (state_num i = d->min_trcount; i < d->tralloc; i++) |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2938 { |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2939 free (d->trans[i]); |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2940 free (d->fails[i]); |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2941 d->trans[i] = d->fails[i] = NULL; |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2942 } |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2943 d->trcount = 0; |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2944 } |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2945 |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2946 d->trcount++; |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2947 *ptrans = trans = xmalloc (NOTCHAR * sizeof *trans); |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2948 |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2949 /* Fill transition table with a default value which means that the |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2950 transited state has not been calculated yet. */ |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2951 for (int i = 0; i < NOTCHAR; i++) |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2952 trans[i] = -2; |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2953 } |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2954 |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2955 /* Set up the success bits for this state. */ |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2956 d->success[s] = 0; |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2957 if (accepts_in_context (d->states[s].context, CTX_NEWLINE, s, d)) |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2958 d->success[s] |= CTX_NEWLINE; |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2959 if (accepts_in_context (d->states[s].context, CTX_LETTER, s, d)) |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2960 d->success[s] |= CTX_LETTER; |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2961 if (accepts_in_context (d->states[s].context, CTX_NONE, s, d)) |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2962 d->success[s] |= CTX_NONE; |
9812ab19bd35
dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18659
diff
changeset
|
2963 |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2964 alloc_position_set (&follows, d->nleaves); |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2965 |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2966 /* Find the union of the follows of the positions of the group. |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2967 This is a hideously inefficient loop. Fix it someday. */ |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2968 for (size_t j = 0; j < d->states[s].elems.nelem; ++j) |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2969 for (size_t k = 0; |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2970 k < d->follows[d->states[s].elems.elems[j].index].nelem; ++k) |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2971 insert (d->follows[d->states[s].elems.elems[j].index].elems[k], |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2972 &follows); |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2973 |
18633
42cabb9832cd
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18632
diff
changeset
|
2974 /* Positions that match the input char. */ |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2975 alloc_position_set (&group, d->nleaves); |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
2976 |
18633
42cabb9832cd
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18632
diff
changeset
|
2977 /* The group's label. */ |
42cabb9832cd
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18632
diff
changeset
|
2978 charclass label; |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
2979 fillset (&label); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2980 |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2981 for (size_t i = 0; i < follows.nelem; ++i) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2982 { |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
2983 charclass matches; /* Set of matching characters. */ |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
2984 position pos = follows.elems[i]; |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
2985 bool matched = false; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2986 if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR) |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
2987 { |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
2988 zeroset (&matches); |
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
2989 setbit (d->tokens[pos.index], &matches); |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
2990 if (d->tokens[pos.index] == uc) |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
2991 matched = true; |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
2992 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2993 else if (d->tokens[pos.index] >= CSET) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
2994 { |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
2995 matches = d->charclasses[d->tokens[pos.index] - CSET]; |
18931
6daf1ec75a2e
dfa: two small simplifications
Jim Meyering <meyering@fb.com>
parents:
18914
diff
changeset
|
2996 if (tstbit (uc, &matches)) |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
2997 matched = true; |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
2998 } |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
2999 else if (d->tokens[pos.index] == ANYCHAR) |
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
3000 { |
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
3001 matches = d->charclasses[d->canychar]; |
18931
6daf1ec75a2e
dfa: two small simplifications
Jim Meyering <meyering@fb.com>
parents:
18914
diff
changeset
|
3002 if (tstbit (uc, &matches)) |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3003 matched = true; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3004 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3005 /* ANYCHAR must match with a single character, so we must put |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3006 it to D->states[s].mbps which contains the positions which |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3007 can match with a single character not a byte. If all |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3008 positions which has ANYCHAR does not depend on context of |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3009 next character, we put the follows instead of it to |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3010 D->states[s].mbps to optimize. */ |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
3011 if (succeeds_in_context (pos.constraint, d->states[s].context, |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3012 CTX_NONE)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3013 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3014 if (d->states[s].mbps.nelem == 0) |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
3015 alloc_position_set (&d->states[s].mbps, 1); |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
3016 insert (pos, &d->states[s].mbps); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3017 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3018 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3019 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3020 continue; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3021 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3022 /* Some characters may need to be eliminated from matches because |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3023 they fail in the current context. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3024 if (pos.constraint != NO_CONSTRAINT) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3025 { |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
3026 if (!succeeds_in_context (pos.constraint, |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3027 d->states[s].context, CTX_NEWLINE)) |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3028 for (size_t j = 0; j < CHARCLASS_WORDS; ++j) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
3029 matches.w[j] &= ~d->syntax.newline.w[j]; |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
3030 if (!succeeds_in_context (pos.constraint, |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3031 d->states[s].context, CTX_LETTER)) |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3032 for (size_t j = 0; j < CHARCLASS_WORDS; ++j) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
3033 matches.w[j] &= ~d->syntax.letters.w[j]; |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
3034 if (!succeeds_in_context (pos.constraint, |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3035 d->states[s].context, CTX_NONE)) |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3036 for (size_t j = 0; j < CHARCLASS_WORDS; ++j) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
3037 matches.w[j] &= d->syntax.letters.w[j] | d->syntax.newline.w[j]; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3038 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3039 /* If there are no characters left, there's no point in going on. */ |
18668
1fe5f10b4b1c
dfa: minor simplification with emptyset
Paul Eggert <eggert@cs.ucla.edu>
parents:
18667
diff
changeset
|
3040 if (emptyset (&matches)) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3041 continue; |
18535
9c210050a97b
dfa: avoid new infinite loop
Jim Meyering <meyering@fb.com>
parents:
18534
diff
changeset
|
3042 |
9c210050a97b
dfa: avoid new infinite loop
Jim Meyering <meyering@fb.com>
parents:
18534
diff
changeset
|
3043 /* If we have reset the bit that made us declare "matched", reset |
9c210050a97b
dfa: avoid new infinite loop
Jim Meyering <meyering@fb.com>
parents:
18534
diff
changeset
|
3044 that indicator, too. This is required to avoid an infinite loop |
9c210050a97b
dfa: avoid new infinite loop
Jim Meyering <meyering@fb.com>
parents:
18534
diff
changeset
|
3045 with this command: echo cx | LC_ALL=C grep -E 'c\b[x ]' */ |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
3046 if (!tstbit (uc, &matches)) |
18535
9c210050a97b
dfa: avoid new infinite loop
Jim Meyering <meyering@fb.com>
parents:
18534
diff
changeset
|
3047 matched = false; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3048 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3049 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3050 #ifdef DEBUG |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3051 fprintf (stderr, " nextpos %zu:", pos.index); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3052 prtok (d->tokens[pos.index]); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3053 fprintf (stderr, " of"); |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3054 for (size_t j = 0; j < NOTCHAR; j++) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
3055 if (tstbit (j, &matches)) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3056 fprintf (stderr, " 0x%02zx", j); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3057 fprintf (stderr, "\n"); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3058 #endif |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3059 |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3060 if (matched) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3061 { |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3062 for (size_t k = 0; k < CHARCLASS_WORDS; ++k) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
3063 label.w[k] &= matches.w[k]; |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
3064 append (pos, &group); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3065 } |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3066 else |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3067 { |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3068 for (size_t k = 0; k < CHARCLASS_WORDS; ++k) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
3069 label.w[k] &= ~matches.w[k]; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3070 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3071 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3072 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3073 alloc_position_set (&tmp, d->nleaves); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3074 |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3075 if (group.nelem > 0) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3076 { |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3077 /* If we are building a searching matcher, throw in the positions |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3078 of state 0 as well, if possible. */ |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3079 if (d->searchflag) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3080 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3081 /* If a token in follows.elems is not 1st byte of a multibyte |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3082 character, or the states of follows must accept the bytes |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3083 which are not 1st byte of the multibyte character. |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3084 Then, if a state of follows encounters a byte, it must not be |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3085 a 1st byte of a multibyte character nor a single byte character. |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3086 In this case, do not add state[0].follows to next state, because |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3087 state[0] must accept 1st-byte. |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3088 |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3089 For example, suppose <sb a> is a certain single byte character, |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3090 <mb A> is a certain multibyte character, and the codepoint of |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3091 <sb a> equals the 2nd byte of the codepoint of <mb A>. When |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3092 state[0] accepts <sb a>, state[i] transits to state[i+1] by |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3093 accepting the 1st byte of <mb A>, and state[i+1] accepts the |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3094 2nd byte of <mb A>, if state[i+1] encounters the codepoint of |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3095 <sb a>, it must not be <sb a> but the 2nd byte of <mb A>, so do |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3096 not add state[0]. */ |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3097 |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3098 bool mergeit = !d->localeinfo.multibyte; |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3099 if (!mergeit) |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3100 { |
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3101 mergeit = true; |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
3102 for (size_t j = 0; mergeit && j < group.nelem; j++) |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
3103 mergeit &= d->multibyte_prop[group.elems[j].index]; |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3104 } |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3105 if (mergeit) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3106 { |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
3107 merge (&d->states[0].elems, &group, &tmp); |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
3108 copy (&tmp, &group); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3109 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3110 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3111 |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3112 /* Find out if the new state will want any context information, |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3113 by calculating possible contexts that the group can match, |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3114 and separate contexts that the new state wants to know. */ |
40047
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
3115 int possible_contexts = charclass_context (d, &label); |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
3116 int separate_contexts = state_separate_contexts (d, &group); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3117 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3118 /* Find the state(s) corresponding to the union of the follows. */ |
40047
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
3119 if (possible_contexts & ~separate_contexts) |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
3120 state = state_index (d, &group, separate_contexts ^ CTX_ANY); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3121 else |
40047
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
3122 state = -1; |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
3123 if (separate_contexts & possible_contexts & CTX_NEWLINE) |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
3124 state_newline = state_index (d, &group, CTX_NEWLINE); |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
3125 else |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
3126 state_newline = state; |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
3127 if (separate_contexts & possible_contexts & CTX_LETTER) |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
3128 state_letter = state_index (d, &group, CTX_LETTER); |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
3129 else |
183a2f6b0b16
revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents:
40026
diff
changeset
|
3130 state_letter = state; |
18659
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3131 |
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3132 /* Reallocate now, to reallocate any newline transition properly. */ |
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3133 realloc_trans_if_necessary (d); |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3134 } |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3135 |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3136 /* If we are a searching matcher, the default transition is to a state |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3137 containing the positions of state 0, otherwise the default transition |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3138 is to fail miserably. */ |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3139 else if (d->searchflag) |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3140 { |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3141 state_newline = 0; |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3142 state_letter = d->min_trcount - 1; |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3143 state = d->initstate_notbol; |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3144 } |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3145 else |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3146 { |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3147 state_newline = -1; |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3148 state_letter = -1; |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3149 state = -1; |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3150 } |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3151 |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3152 /* Set the transitions for each character in the label. */ |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3153 for (size_t i = 0; i < NOTCHAR; i++) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
3154 if (tstbit (i, &label)) |
18659
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3155 switch (d->syntax.sbit[i]) |
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3156 { |
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3157 case CTX_NEWLINE: |
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3158 trans[i] = state_newline; |
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3159 break; |
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3160 case CTX_LETTER: |
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3161 trans[i] = state_letter; |
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3162 break; |
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3163 default: |
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3164 trans[i] = state; |
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3165 break; |
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3166 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3167 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3168 #ifdef DEBUG |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3169 fprintf (stderr, "trans table %td", s); |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3170 for (size_t i = 0; i < NOTCHAR; ++i) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3171 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3172 if (!(i & 0xf)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3173 fprintf (stderr, "\n"); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3174 fprintf (stderr, " %2td", trans[i]); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3175 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3176 fprintf (stderr, "\n"); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3177 #endif |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3178 |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3179 free (group.elems); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3180 free (follows.elems); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3181 free (tmp.elems); |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3182 |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3183 /* Keep the newline transition in a special place so we can use it as |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3184 a sentinel. */ |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
3185 if (tstbit (d->syntax.eolbyte, &label)) |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3186 { |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3187 d->newlines[s] = trans[d->syntax.eolbyte]; |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3188 trans[d->syntax.eolbyte] = -1; |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3189 } |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3190 |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3191 return trans[uc]; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3192 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3193 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3194 /* Multibyte character handling sub-routines for dfaexec. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3195 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3196 /* Consume a single byte and transit state from 's' to '*next_state'. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3197 This function is almost same as the state transition routin in dfaexec. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3198 But state transition is done just once, otherwise matching succeed or |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3199 reach the end of the buffer. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3200 static state_num |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3201 transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const **pp) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3202 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3203 state_num *t; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3204 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3205 if (d->trans[s]) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3206 t = d->trans[s]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3207 else if (d->fails[s]) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3208 t = d->fails[s]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3209 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3210 { |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3211 build_state (s, d, **pp); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3212 if (d->trans[s]) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3213 t = d->trans[s]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3214 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3215 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3216 t = d->fails[s]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3217 assert (t); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3218 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3219 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3220 |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3221 if (t[**pp] == -2) |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3222 build_state (s, d, **pp); |
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3223 |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3224 return t[*(*pp)++]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3225 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3226 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3227 /* Transit state from s, then return new state and update the pointer of |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3228 the buffer. This function is for a period operator which can match a |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3229 multi-byte character. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3230 static state_num |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3231 transit_state (struct dfa *d, state_num s, unsigned char const **pp, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3232 unsigned char const *end) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3233 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3234 wint_t wc; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3235 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3236 int mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3237 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3238 /* This state has some operators which can match a multibyte character. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3239 d->mb_follows.nelem = 0; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3240 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3241 /* Calculate the state which can be reached from the state 's' by |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3242 consuming 'mbclen' single bytes from the buffer. */ |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3243 state_num s1 = s; |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3244 int mbci; |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3245 for (mbci = 0; mbci < mbclen && (mbci == 0 || d->min_trcount <= s); mbci++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3246 s = transit_state_singlebyte (d, s, pp); |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3247 *pp += mbclen - mbci; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3248 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3249 if (wc == WEOF) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3250 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3251 /* It is an invalid character, so ANYCHAR is not accepted. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3252 return s; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3253 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3254 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3255 /* If all positions which have ANYCHAR do not depend on the context |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3256 of the next character, calculate the next state with |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3257 pre-calculated follows and cache the result. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3258 if (d->states[s1].mb_trindex < 0) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3259 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3260 if (MAX_TRCOUNT <= d->mb_trcount) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3261 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3262 state_num s3; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3263 for (s3 = -1; s3 < d->tralloc; s3++) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3264 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3265 free (d->mb_trans[s3]); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3266 d->mb_trans[s3] = NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3267 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3268 |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3269 for (state_num i = 0; i < d->sindex; i++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3270 d->states[i].mb_trindex = -1; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3271 d->mb_trcount = 0; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3272 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3273 d->states[s1].mb_trindex = d->mb_trcount++; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3274 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3275 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3276 if (! d->mb_trans[s]) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3277 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3278 enum { TRANSPTR_SIZE = sizeof *d->mb_trans[s] }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3279 enum { TRANSALLOC_SIZE = MAX_TRCOUNT * TRANSPTR_SIZE }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3280 d->mb_trans[s] = xmalloc (TRANSALLOC_SIZE); |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3281 for (int i = 0; i < MAX_TRCOUNT; i++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3282 d->mb_trans[s][i] = -1; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3283 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3284 else if (d->mb_trans[s][d->states[s1].mb_trindex] >= 0) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3285 return d->mb_trans[s][d->states[s1].mb_trindex]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3286 |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3287 if (s == -1) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3288 copy (&d->states[s1].mbps, &d->mb_follows); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3289 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3290 merge (&d->states[s1].mbps, &d->states[s].elems, &d->mb_follows); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3291 |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
3292 int separate_contexts = state_separate_contexts (d, &d->mb_follows); |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3293 state_num s2 = state_index (d, &d->mb_follows, separate_contexts ^ CTX_ANY); |
18659
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3294 realloc_trans_if_necessary (d); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3295 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3296 d->mb_trans[s][d->states[s1].mb_trindex] = s2; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3297 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3298 return s2; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3299 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3300 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3301 /* The initial state may encounter a byte which is not a single byte character |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3302 nor the first byte of a multibyte character. But it is incorrect for the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3303 initial state to accept such a byte. For example, in Shift JIS the regular |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3304 expression "\\" accepts the codepoint 0x5c, but should not accept the second |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3305 byte of the codepoint 0x815c. Then the initial state must skip the bytes |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3306 that are not a single byte character nor the first byte of a multibyte |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3307 character. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3308 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3309 Given DFA state d, use mbs_to_wchar to advance MBP until it reaches |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3310 or exceeds P, and return the advanced MBP. If WCP is non-NULL and |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3311 the result is greater than P, set *WCP to the final wide character |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3312 processed, or to WEOF if no wide character is processed. Otherwise, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3313 if WCP is non-NULL, *WCP may or may not be updated. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3314 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3315 Both P and MBP must be no larger than END. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3316 static unsigned char const * |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3317 skip_remains_mb (struct dfa *d, unsigned char const *p, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3318 unsigned char const *mbp, char const *end) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3319 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3320 if (d->syntax.never_trail[*p]) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3321 return p; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3322 while (mbp < p) |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3323 { |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3324 wint_t wc; |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3325 mbp += mbs_to_wchar (&wc, (char const *) mbp, |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3326 end - (char const *) mbp, d); |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3327 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3328 return mbp; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3329 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3330 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3331 /* Search through a buffer looking for a match to the struct dfa *D. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3332 Find the first occurrence of a string matching the regexp in the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3333 buffer, and the shortest possible version thereof. Return a pointer to |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3334 the first character after the match, or NULL if none is found. BEGIN |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3335 points to the beginning of the buffer, and END points to the first byte |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3336 after its end. Note however that we store a sentinel byte (usually |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3337 newline) in *END, so the actual buffer must be one byte longer. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3338 When ALLOW_NL, newlines may appear in the matching string. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3339 If COUNT is non-NULL, increment *COUNT once for each newline processed. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3340 If MULTIBYTE, the input consists of multibyte characters and/or |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3341 encoding-error bytes. Otherwise, it consists of single-byte characters. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3342 Here is the list of features that make this DFA matcher punt: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3343 - [M-N] range in non-simple locale: regex is up to 25% faster on [a-z] |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3344 - [^...] in non-simple locale |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3345 - [[=foo=]] or [[.foo.]] |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3346 - [[:alpha:]] etc. in multibyte locale (except [[:digit:]] works OK) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3347 - back-reference: (.)\1 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3348 - word-delimiter in multibyte locale: \<, \>, \b, \B |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3349 See using_simple_locale for the definition of "simple locale". */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3350 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3351 static inline char * |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3352 dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3353 size_t *count, bool multibyte) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3354 { |
18444
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3355 if (MAX_TRCOUNT <= d->sindex) |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3356 { |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3357 for (state_num s = d->min_trcount; s < d->sindex; s++) |
18444
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3358 { |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3359 free (d->states[s].elems.elems); |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3360 free (d->states[s].mbps.elems); |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3361 } |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3362 d->sindex = d->min_trcount; |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3363 |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3364 if (d->trans) |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3365 { |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3366 for (state_num s = 0; s < d->tralloc; s++) |
18444
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3367 { |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3368 free (d->trans[s]); |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3369 free (d->fails[s]); |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3370 d->trans[s] = d->fails[s] = NULL; |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3371 } |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3372 d->trcount = 0; |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3373 } |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3374 |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3375 if (d->localeinfo.multibyte && d->mb_trans) |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3376 { |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3377 for (state_num s = -1; s < d->tralloc; s++) |
18444
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3378 { |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3379 free (d->mb_trans[s]); |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3380 d->mb_trans[s] = NULL; |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3381 } |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3382 for (state_num s = 0; s < d->min_trcount; s++) |
18444
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3383 d->states[s].mb_trindex = -1; |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3384 d->mb_trcount = 0; |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3385 } |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3386 } |
0f7103b2baf0
dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18435
diff
changeset
|
3387 |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3388 if (!d->tralloc) |
18659
161f38194efe
dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18658
diff
changeset
|
3389 realloc_trans_if_necessary (d); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3390 |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3391 /* Current state. */ |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3392 state_num s = 0, s1 = 0; |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3393 |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3394 /* Current input character. */ |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3395 unsigned char const *p = (unsigned char const *) begin; |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3396 unsigned char const *mbp = p; |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3397 |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3398 /* Copy of d->trans so it can be optimized into a register. */ |
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3399 state_num **trans = d->trans; |
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3400 unsigned char eol = d->syntax.eolbyte; /* Likewise for eolbyte. */ |
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3401 unsigned char saved_end = *(unsigned char *) end; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3402 *end = eol; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3403 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3404 if (multibyte) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3405 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3406 memset (&d->mbs, 0, sizeof d->mbs); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3407 if (d->mb_follows.alloc == 0) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3408 alloc_position_set (&d->mb_follows, d->nleaves); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3409 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3410 |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3411 size_t nlcount = 0; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3412 for (;;) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3413 { |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3414 state_num *t; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3415 while ((t = trans[s]) != NULL) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3416 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3417 if (s < d->min_trcount) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3418 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3419 if (!multibyte || d->states[s].mbps.nelem == 0) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3420 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3421 while (t[*p] == s) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3422 p++; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3423 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3424 if (multibyte) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3425 p = mbp = skip_remains_mb (d, p, mbp, end); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3426 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3427 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3428 if (multibyte) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3429 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3430 s1 = s; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3431 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3432 if (d->states[s].mbps.nelem == 0 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3433 || d->localeinfo.sbctowc[*p] != WEOF || (char *) p >= end) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3434 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3435 /* If an input character does not match ANYCHAR, do it |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3436 like a single-byte character. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3437 s = t[*p++]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3438 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3439 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3440 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3441 s = transit_state (d, s, &p, (unsigned char *) end); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3442 mbp = p; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3443 trans = d->trans; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3444 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3445 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3446 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3447 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3448 s1 = t[*p++]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3449 t = trans[s1]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3450 if (! t) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3451 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3452 state_num tmp = s; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3453 s = s1; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3454 s1 = tmp; /* swap */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3455 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3456 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3457 if (s < d->min_trcount) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3458 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3459 while (t[*p] == s1) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3460 p++; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3461 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3462 s = t[*p++]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3463 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3464 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3465 |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3466 if (s < 0) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3467 { |
18524
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3468 if (s == -2) |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3469 { |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3470 s = build_state (s1, d, p[-1]); |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3471 trans = d->trans; |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3472 } |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3473 else if ((char *) p <= end && p[-1] == eol && 0 <= d->newlines[s1]) |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3474 { |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3475 /* The previous character was a newline. Count it, and skip |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3476 checking of multibyte character boundary until here. */ |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3477 nlcount++; |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3478 mbp = p; |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3479 |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3480 s = (allow_nl ? d->newlines[s1] |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3481 : d->syntax.sbit[eol] == CTX_NEWLINE ? 0 |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3482 : d->syntax.sbit[eol] == CTX_LETTER ? d->min_trcount - 1 |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3483 : d->initstate_notbol); |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3484 } |
06c71a5ec1e9
dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents:
18523
diff
changeset
|
3485 else |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3486 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3487 p = NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3488 goto done; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3489 } |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3490 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3491 else if (d->fails[s]) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3492 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3493 if ((d->success[s] & d->syntax.sbit[*p]) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3494 || ((char *) p == end |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
3495 && accepts_in_context (d->states[s].context, CTX_NEWLINE, s, |
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
3496 d))) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3497 goto done; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3498 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3499 if (multibyte && s < d->min_trcount) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3500 p = mbp = skip_remains_mb (d, p, mbp, end); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3501 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3502 s1 = s; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3503 if (!multibyte || d->states[s].mbps.nelem == 0 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3504 || d->localeinfo.sbctowc[*p] != WEOF || (char *) p >= end) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3505 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3506 /* If a input character does not match ANYCHAR, do it |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3507 like a single-byte character. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3508 s = d->fails[s][*p++]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3509 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3510 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3511 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3512 s = transit_state (d, s, &p, (unsigned char *) end); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3513 mbp = p; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3514 trans = d->trans; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3515 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3516 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3517 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3518 { |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3519 build_state (s, d, p[0]); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3520 trans = d->trans; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3521 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3522 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3523 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3524 done: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3525 if (count) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3526 *count += nlcount; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3527 *end = saved_end; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3528 return (char *) p; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3529 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3530 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3531 /* Specialized versions of dfaexec for multibyte and single-byte cases. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3532 This is for performance, as dfaexec_main is an inline function. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3533 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3534 static char * |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3535 dfaexec_mb (struct dfa *d, char const *begin, char *end, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3536 bool allow_nl, size_t *count, bool *backref) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3537 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3538 return dfaexec_main (d, begin, end, allow_nl, count, true); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3539 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3540 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3541 static char * |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3542 dfaexec_sb (struct dfa *d, char const *begin, char *end, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3543 bool allow_nl, size_t *count, bool *backref) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3544 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3545 return dfaexec_main (d, begin, end, allow_nl, count, false); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3546 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3547 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3548 /* Always set *BACKREF and return BEGIN. Use this wrapper for |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3549 any regexp that uses a construct not supported by this code. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3550 static char * |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3551 dfaexec_noop (struct dfa *d, char const *begin, char *end, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3552 bool allow_nl, size_t *count, bool *backref) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3553 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3554 *backref = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3555 return (char *) begin; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3556 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3557 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3558 /* Like dfaexec_main (D, BEGIN, END, ALLOW_NL, COUNT, D->localeinfo.multibyte), |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3559 but faster and set *BACKREF if the DFA code does not support this |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3560 regexp usage. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3561 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3562 char * |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3563 dfaexec (struct dfa *d, char const *begin, char *end, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3564 bool allow_nl, size_t *count, bool *backref) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3565 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3566 return d->dfaexec (d, begin, end, allow_nl, count, backref); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3567 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3568 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3569 struct dfa * |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3570 dfasuperset (struct dfa const *d) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3571 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3572 return d->superset; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3573 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3574 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3575 bool |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3576 dfaisfast (struct dfa const *d) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3577 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3578 return d->fast; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3579 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3580 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3581 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3582 free_mbdata (struct dfa *d) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3583 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3584 free (d->multibyte_prop); |
18620
1c30554fd1dc
dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents:
18619
diff
changeset
|
3585 free (d->lex.brack.chars); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3586 free (d->mb_follows.elems); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3587 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3588 if (d->mb_trans) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3589 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3590 state_num s; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3591 for (s = -1; s < d->tralloc; s++) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3592 free (d->mb_trans[s]); |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3593 free (d->mb_trans - 2); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3594 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3595 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3596 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3597 /* Return true if every construct in D is supported by this DFA matcher. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3598 static bool _GL_ATTRIBUTE_PURE |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3599 dfa_supported (struct dfa const *d) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3600 { |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3601 for (size_t i = 0; i < d->tindex; i++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3602 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3603 switch (d->tokens[i]) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3604 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3605 case BEGWORD: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3606 case ENDWORD: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3607 case LIMWORD: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3608 case NOTLIMWORD: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3609 if (!d->localeinfo.multibyte) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3610 continue; |
18914
886945d1fa95
manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents:
18752
diff
changeset
|
3611 FALLTHROUGH; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3612 case BACKREF: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3613 case MBCSET: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3614 return false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3615 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3616 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3617 return true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3618 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3619 |
39861
5d7b30167723
dfa: trivial comment fix: s/is/if/
Jim Meyering <meyering@fb.com>
parents:
39860
diff
changeset
|
3620 /* Disable use of the superset DFA if it is not likely to help |
39860
fd9996b911ad
dfa: use more-informative function name
Paul Eggert <eggert@cs.ucla.edu>
parents:
39859
diff
changeset
|
3621 performance. */ |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3622 static void |
39860
fd9996b911ad
dfa: use more-informative function name
Paul Eggert <eggert@cs.ucla.edu>
parents:
39859
diff
changeset
|
3623 maybe_disable_superset_dfa (struct dfa *d) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3624 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3625 if (!d->localeinfo.using_utf8) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3626 return; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3627 |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3628 bool have_backref = false; |
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3629 for (size_t i = 0; i < d->tindex; ++i) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3630 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3631 switch (d->tokens[i]) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3632 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3633 case ANYCHAR: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3634 /* Lowered. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3635 abort (); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3636 case BACKREF: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3637 have_backref = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3638 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3639 case MBCSET: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3640 /* Requires multi-byte algorithm. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3641 return; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3642 default: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3643 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3644 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3645 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3646 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3647 if (!have_backref && d->superset) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3648 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3649 /* The superset DFA is not likely to be much faster, so remove it. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3650 dfafree (d->superset); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3651 free (d->superset); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3652 d->superset = NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3653 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3654 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3655 free_mbdata (d); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3656 d->localeinfo.multibyte = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3657 d->dfaexec = dfaexec_sb; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3658 d->fast = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3659 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3660 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3661 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3662 dfassbuild (struct dfa *d) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3663 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3664 struct dfa *sup = dfaalloc (); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3665 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3666 *sup = *d; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3667 sup->localeinfo.multibyte = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3668 sup->dfaexec = dfaexec_sb; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3669 sup->multibyte_prop = NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3670 sup->superset = NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3671 sup->states = NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3672 sup->sindex = 0; |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
3673 sup->constraints = NULL; |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
3674 sup->separates = NULL; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3675 sup->follows = NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3676 sup->tralloc = 0; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3677 sup->trans = NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3678 sup->fails = NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3679 sup->success = NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3680 sup->newlines = NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3681 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3682 sup->charclasses = xnmalloc (sup->calloc, sizeof *sup->charclasses); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3683 if (d->cindex) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3684 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3685 memcpy (sup->charclasses, d->charclasses, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3686 d->cindex * sizeof *sup->charclasses); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3687 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3688 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3689 sup->tokens = xnmalloc (d->tindex, 2 * sizeof *sup->tokens); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3690 sup->talloc = d->tindex * 2; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3691 |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3692 bool have_achar = false; |
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3693 bool have_nchar = false; |
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3694 size_t j; |
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3695 for (size_t i = j = 0; i < d->tindex; i++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3696 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3697 switch (d->tokens[i]) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3698 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3699 case ANYCHAR: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3700 case MBCSET: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3701 case BACKREF: |
18525
1545248f9c57
dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents:
18524
diff
changeset
|
3702 { |
1545248f9c57
dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents:
18524
diff
changeset
|
3703 charclass ccl; |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
3704 fillset (&ccl); |
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
3705 sup->tokens[j++] = CSET + charclass_index (sup, &ccl); |
18525
1545248f9c57
dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents:
18524
diff
changeset
|
3706 sup->tokens[j++] = STAR; |
1545248f9c57
dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents:
18524
diff
changeset
|
3707 if (d->tokens[i + 1] == QMARK || d->tokens[i + 1] == STAR |
1545248f9c57
dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents:
18524
diff
changeset
|
3708 || d->tokens[i + 1] == PLUS) |
1545248f9c57
dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents:
18524
diff
changeset
|
3709 i++; |
1545248f9c57
dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents:
18524
diff
changeset
|
3710 have_achar = true; |
1545248f9c57
dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents:
18524
diff
changeset
|
3711 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3712 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3713 case BEGWORD: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3714 case ENDWORD: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3715 case LIMWORD: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3716 case NOTLIMWORD: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3717 if (d->localeinfo.multibyte) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3718 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3719 /* These constraints aren't supported in a multibyte locale. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3720 Ignore them in the superset DFA. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3721 sup->tokens[j++] = EMPTY; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3722 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3723 } |
18914
886945d1fa95
manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents:
18752
diff
changeset
|
3724 FALLTHROUGH; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3725 default: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3726 sup->tokens[j++] = d->tokens[i]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3727 if ((0 <= d->tokens[i] && d->tokens[i] < NOTCHAR) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3728 || d->tokens[i] >= CSET) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3729 have_nchar = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3730 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3731 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3732 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3733 sup->tindex = j; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3734 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3735 if (have_nchar && (have_achar || d->localeinfo.multibyte)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3736 d->superset = sup; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3737 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3738 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3739 dfafree (sup); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3740 free (sup); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3741 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3742 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3743 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3744 /* Parse and analyze a single string of the given length. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3745 void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3746 dfacomp (char const *s, size_t len, struct dfa *d, bool searchflag) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3747 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3748 dfaparse (s, len, d); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3749 dfassbuild (d); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3750 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3751 if (dfa_supported (d)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3752 { |
39860
fd9996b911ad
dfa: use more-informative function name
Paul Eggert <eggert@cs.ucla.edu>
parents:
39859
diff
changeset
|
3753 maybe_disable_superset_dfa (d); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3754 dfaanalyze (d, searchflag); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3755 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3756 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3757 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3758 d->dfaexec = dfaexec_noop; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3759 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3760 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3761 if (d->superset) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3762 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3763 d->fast = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3764 dfaanalyze (d->superset, searchflag); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3765 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3766 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3767 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3768 /* Free the storage held by the components of a dfa. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3769 void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3770 dfafree (struct dfa *d) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3771 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3772 free (d->charclasses); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3773 free (d->tokens); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3774 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3775 if (d->localeinfo.multibyte) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3776 free_mbdata (d); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3777 |
39956
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
3778 free (d->constraints); |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
3779 free (d->separates); |
4fee19f467e5
dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
39955
diff
changeset
|
3780 |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3781 for (size_t i = 0; i < d->sindex; ++i) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3782 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3783 free (d->states[i].elems.elems); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3784 free (d->states[i].mbps.elems); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3785 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3786 free (d->states); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3787 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3788 if (d->follows) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3789 { |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3790 for (size_t i = 0; i < d->tindex; ++i) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3791 free (d->follows[i].elems); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3792 free (d->follows); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3793 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3794 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3795 if (d->trans) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3796 { |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3797 for (size_t i = 0; i < d->tralloc; ++i) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3798 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3799 free (d->trans[i]); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3800 free (d->fails[i]); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3801 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3802 |
18523
503cb4e4af32
dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents:
18519
diff
changeset
|
3803 free (d->trans - 2); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3804 free (d->fails); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3805 free (d->newlines); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3806 free (d->success); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3807 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3808 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3809 if (d->superset) |
39760
9e30fb88528f
dfa: fix memory leak
Assaf Gordon <assafgordon@gmail.com>
parents:
39722
diff
changeset
|
3810 { |
9e30fb88528f
dfa: fix memory leak
Assaf Gordon <assafgordon@gmail.com>
parents:
39722
diff
changeset
|
3811 dfafree (d->superset); |
9e30fb88528f
dfa: fix memory leak
Assaf Gordon <assafgordon@gmail.com>
parents:
39722
diff
changeset
|
3812 free (d->superset); |
9e30fb88528f
dfa: fix memory leak
Assaf Gordon <assafgordon@gmail.com>
parents:
39722
diff
changeset
|
3813 } |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3814 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3815 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3816 /* Having found the postfix representation of the regular expression, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3817 try to find a long sequence of characters that must appear in any line |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3818 containing the r.e. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3819 Finding a "longest" sequence is beyond the scope here; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3820 we take an easy way out and hope for the best. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3821 (Take "(ab|a)b"--please.) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3822 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3823 We do a bottom-up calculation of sequences of characters that must appear |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3824 in matches of r.e.'s represented by trees rooted at the nodes of the postfix |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3825 representation: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3826 sequences that must appear at the left of the match ("left") |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3827 sequences that must appear at the right of the match ("right") |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3828 lists of sequences that must appear somewhere in the match ("in") |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3829 sequences that must constitute the match ("is") |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3830 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3831 When we get to the root of the tree, we use one of the longest of its |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3832 calculated "in" sequences as our answer. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3833 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3834 The sequences calculated for the various types of node (in pseudo ANSI c) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3835 are shown below. "p" is the operand of unary operators (and the left-hand |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3836 operand of binary operators); "q" is the right-hand operand of binary |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3837 operators. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3838 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3839 "ZERO" means "a zero-length sequence" below. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3840 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3841 Type left right is in |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3842 ---- ---- ----- -- -- |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3843 char c # c # c # c # c |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3844 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3845 ANYCHAR ZERO ZERO ZERO ZERO |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3846 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3847 MBCSET ZERO ZERO ZERO ZERO |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3848 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3849 CSET ZERO ZERO ZERO ZERO |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3850 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3851 STAR ZERO ZERO ZERO ZERO |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3852 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3853 QMARK ZERO ZERO ZERO ZERO |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3854 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3855 PLUS p->left p->right ZERO p->in |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3856 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3857 CAT (p->is==ZERO)? (q->is==ZERO)? (p->is!=ZERO && p->in plus |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3858 p->left : q->right : q->is!=ZERO) ? q->in plus |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3859 p->is##q->left p->right##q->is p->is##q->is : p->right##q->left |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3860 ZERO |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3861 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3862 OR longest common longest common (do p->is and substrings common |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3863 leading trailing to q->is have same p->in and |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3864 (sub)sequence (sub)sequence q->in length and content) ? |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3865 of p->left of p->right |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3866 and q->left and q->right p->is : NULL |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3867 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3868 If there's anything else we recognize in the tree, all four sequences get set |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3869 to zero-length sequences. If there's something we don't recognize in the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3870 tree, we just return a zero-length sequence. |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3871 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3872 Break ties in favor of infrequent letters (choosing 'zzz' in preference to |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3873 'aaa')? |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3874 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3875 And ... is it here or someplace that we might ponder "optimizations" such as |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3876 egrep 'psi|epsilon' -> egrep 'psi' |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3877 egrep 'pepsi|epsilon' -> egrep 'epsi' |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3878 (Yes, we now find "epsi" as a "string |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3879 that must occur", but we might also |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3880 simplify the *entire* r.e. being sought) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3881 grep '[c]' -> grep 'c' |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3882 grep '(ab|a)b' -> grep 'ab' |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3883 grep 'ab*' -> grep 'a' |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3884 grep 'a*b' -> grep 'b' |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3885 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3886 There are several issues: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3887 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3888 Is optimization easy (enough)? |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3889 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3890 Does optimization actually accomplish anything, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3891 or is the automaton you get from "psi|epsilon" (for example) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3892 the same as the one you get from "psi" (for example)? |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3893 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3894 Are optimizable r.e.'s likely to be used in real-life situations |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3895 (something like 'ab*' is probably unlikely; something like is |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3896 'psi|epsilon' is likelier)? */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3897 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3898 static char * |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3899 icatalloc (char *old, char const *new) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3900 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3901 size_t newsize = strlen (new); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3902 if (newsize == 0) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3903 return old; |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3904 size_t oldsize = strlen (old); |
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3905 char *result = xrealloc (old, oldsize + newsize + 1); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3906 memcpy (result + oldsize, new, newsize + 1); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3907 return result; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3908 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3909 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3910 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3911 freelist (char **cpp) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3912 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3913 while (*cpp) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3914 free (*cpp++); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3915 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3916 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3917 static char ** |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3918 enlist (char **cpp, char *new, size_t len) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3919 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3920 new = memcpy (xmalloc (len + 1), new, len); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3921 new[len] = '\0'; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3922 /* Is there already something in the list that's new (or longer)? */ |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3923 size_t i; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3924 for (i = 0; cpp[i] != NULL; ++i) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3925 if (strstr (cpp[i], new) != NULL) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3926 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3927 free (new); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3928 return cpp; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3929 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3930 /* Eliminate any obsoleted strings. */ |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3931 for (size_t j = 0; cpp[j] != NULL; ) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3932 if (strstr (new, cpp[j]) == NULL) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3933 ++j; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3934 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3935 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3936 free (cpp[j]); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3937 if (--i == j) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3938 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3939 cpp[j] = cpp[i]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3940 cpp[i] = NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3941 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3942 /* Add the new string. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3943 cpp = xnrealloc (cpp, i + 2, sizeof *cpp); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3944 cpp[i] = new; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3945 cpp[i + 1] = NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3946 return cpp; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3947 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3948 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3949 /* Given pointers to two strings, return a pointer to an allocated |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3950 list of their distinct common substrings. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3951 static char ** |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3952 comsubs (char *left, char const *right) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3953 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3954 char **cpp = xzalloc (sizeof *cpp); |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3955 |
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
3956 for (char *lcp = left; *lcp != '\0'; lcp++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3957 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3958 size_t len = 0; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3959 char *rcp = strchr (right, *lcp); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3960 while (rcp != NULL) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3961 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3962 size_t i; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3963 for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3964 continue; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3965 if (i > len) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3966 len = i; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3967 rcp = strchr (rcp + 1, *lcp); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3968 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3969 if (len != 0) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3970 cpp = enlist (cpp, lcp, len); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3971 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3972 return cpp; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3973 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3974 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3975 static char ** |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3976 addlists (char **old, char **new) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3977 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3978 for (; *new; new++) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3979 old = enlist (old, *new, strlen (*new)); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3980 return old; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3981 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3982 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3983 /* Given two lists of substrings, return a new list giving substrings |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3984 common to both. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3985 static char ** |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3986 inboth (char **left, char **right) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3987 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3988 char **both = xzalloc (sizeof *both); |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3989 |
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3990 for (size_t lnum = 0; left[lnum] != NULL; ++lnum) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3991 { |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
3992 for (size_t rnum = 0; right[rnum] != NULL; ++rnum) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3993 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3994 char **temp = comsubs (left[lnum], right[rnum]); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3995 both = addlists (both, temp); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3996 freelist (temp); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3997 free (temp); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3998 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
3999 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4000 return both; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4001 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4002 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4003 typedef struct must must; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4004 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4005 struct must |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4006 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4007 char **in; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4008 char *left; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4009 char *right; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4010 char *is; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4011 bool begline; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4012 bool endline; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4013 must *prev; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4014 }; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4015 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4016 static must * |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4017 allocmust (must *mp, size_t size) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4018 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4019 must *new_mp = xmalloc (sizeof *new_mp); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4020 new_mp->in = xzalloc (sizeof *new_mp->in); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4021 new_mp->left = xzalloc (size); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4022 new_mp->right = xzalloc (size); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4023 new_mp->is = xzalloc (size); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4024 new_mp->begline = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4025 new_mp->endline = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4026 new_mp->prev = mp; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4027 return new_mp; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4028 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4029 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4030 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4031 resetmust (must *mp) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4032 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4033 freelist (mp->in); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4034 mp->in[0] = NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4035 mp->left[0] = mp->right[0] = mp->is[0] = '\0'; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4036 mp->begline = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4037 mp->endline = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4038 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4039 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4040 static void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4041 freemust (must *mp) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4042 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4043 freelist (mp->in); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4044 free (mp->in); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4045 free (mp->left); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4046 free (mp->right); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4047 free (mp->is); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4048 free (mp); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4049 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4050 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4051 struct dfamust * |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4052 dfamust (struct dfa const *d) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4053 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4054 must *mp = NULL; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4055 char const *result = ""; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4056 bool exact = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4057 bool begline = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4058 bool endline = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4059 bool need_begline = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4060 bool need_endline = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4061 bool case_fold_unibyte = d->syntax.case_fold && MB_CUR_MAX == 1; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4062 |
39859
1f2a63e46815
dfa: tweak allocation performance
Paul Eggert <eggert@cs.ucla.edu>
parents:
39858
diff
changeset
|
4063 for (size_t ri = 1; ri + 1 < d->tindex; ri++) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4064 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4065 token t = d->tokens[ri]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4066 switch (t) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4067 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4068 case BEGLINE: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4069 mp = allocmust (mp, 2); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4070 mp->begline = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4071 need_begline = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4072 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4073 case ENDLINE: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4074 mp = allocmust (mp, 2); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4075 mp->endline = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4076 need_endline = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4077 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4078 case LPAREN: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4079 case RPAREN: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4080 assert (!"neither LPAREN nor RPAREN may appear here"); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4081 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4082 case EMPTY: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4083 case BEGWORD: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4084 case ENDWORD: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4085 case LIMWORD: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4086 case NOTLIMWORD: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4087 case BACKREF: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4088 case ANYCHAR: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4089 case MBCSET: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4090 mp = allocmust (mp, 2); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4091 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4092 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4093 case STAR: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4094 case QMARK: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4095 resetmust (mp); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4096 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4097 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4098 case OR: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4099 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4100 char **new; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4101 must *rmp = mp; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4102 must *lmp = mp = mp->prev; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4103 size_t j, ln, rn, n; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4104 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4105 /* Guaranteed to be. Unlikely, but ... */ |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
4106 if (streq (lmp->is, rmp->is)) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4107 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4108 lmp->begline &= rmp->begline; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4109 lmp->endline &= rmp->endline; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4110 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4111 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4112 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4113 lmp->is[0] = '\0'; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4114 lmp->begline = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4115 lmp->endline = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4116 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4117 /* Left side--easy */ |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
4118 size_t i = 0; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4119 while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i]) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4120 ++i; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4121 lmp->left[i] = '\0'; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4122 /* Right side */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4123 ln = strlen (lmp->right); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4124 rn = strlen (rmp->right); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4125 n = ln; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4126 if (n > rn) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4127 n = rn; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4128 for (i = 0; i < n; ++i) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4129 if (lmp->right[ln - i - 1] != rmp->right[rn - i - 1]) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4130 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4131 for (j = 0; j < i; ++j) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4132 lmp->right[j] = lmp->right[(ln - i) + j]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4133 lmp->right[j] = '\0'; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4134 new = inboth (lmp->in, rmp->in); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4135 freelist (lmp->in); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4136 free (lmp->in); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4137 lmp->in = new; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4138 freemust (rmp); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4139 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4140 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4141 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4142 case PLUS: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4143 mp->is[0] = '\0'; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4144 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4145 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4146 case END: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4147 assert (!mp->prev); |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
4148 for (size_t i = 0; mp->in[i] != NULL; ++i) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4149 if (strlen (mp->in[i]) > strlen (result)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4150 result = mp->in[i]; |
18630
3974d9d184ab
dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents:
18629
diff
changeset
|
4151 if (streq (result, mp->is)) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4152 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4153 if ((!need_begline || mp->begline) && (!need_endline |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4154 || mp->endline)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4155 exact = true; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4156 begline = mp->begline; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4157 endline = mp->endline; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4158 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4159 goto done; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4160 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4161 case CAT: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4162 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4163 must *rmp = mp; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4164 must *lmp = mp = mp->prev; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4165 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4166 /* In. Everything in left, plus everything in |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4167 right, plus concatenation of |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4168 left's right and right's left. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4169 lmp->in = addlists (lmp->in, rmp->in); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4170 if (lmp->right[0] != '\0' && rmp->left[0] != '\0') |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4171 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4172 size_t lrlen = strlen (lmp->right); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4173 size_t rllen = strlen (rmp->left); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4174 char *tp = xmalloc (lrlen + rllen); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4175 memcpy (tp, lmp->right, lrlen); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4176 memcpy (tp + lrlen, rmp->left, rllen); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4177 lmp->in = enlist (lmp->in, tp, lrlen + rllen); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4178 free (tp); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4179 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4180 /* Left-hand */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4181 if (lmp->is[0] != '\0') |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4182 lmp->left = icatalloc (lmp->left, rmp->left); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4183 /* Right-hand */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4184 if (rmp->is[0] == '\0') |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4185 lmp->right[0] = '\0'; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4186 lmp->right = icatalloc (lmp->right, rmp->right); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4187 /* Guaranteed to be */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4188 if ((lmp->is[0] != '\0' || lmp->begline) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4189 && (rmp->is[0] != '\0' || rmp->endline)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4190 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4191 lmp->is = icatalloc (lmp->is, rmp->is); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4192 lmp->endline = rmp->endline; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4193 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4194 else |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4195 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4196 lmp->is[0] = '\0'; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4197 lmp->begline = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4198 lmp->endline = false; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4199 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4200 freemust (rmp); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4201 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4202 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4203 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4204 case '\0': |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4205 /* Not on *my* shift. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4206 goto done; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4207 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4208 default: |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4209 if (CSET <= t) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4210 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4211 /* If T is a singleton, or if case-folding in a unibyte |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4212 locale and T's members all case-fold to the same char, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4213 convert T to one of its members. Otherwise, do |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4214 nothing further with T. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4215 charclass *ccl = &d->charclasses[t - CSET]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4216 int j; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4217 for (j = 0; j < NOTCHAR; j++) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
4218 if (tstbit (j, ccl)) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4219 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4220 if (! (j < NOTCHAR)) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4221 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4222 mp = allocmust (mp, 2); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4223 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4224 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4225 t = j; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4226 while (++j < NOTCHAR) |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
4227 if (tstbit (j, ccl) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4228 && ! (case_fold_unibyte |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4229 && toupper (j) == toupper (t))) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4230 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4231 if (j < NOTCHAR) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4232 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4233 mp = allocmust (mp, 2); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4234 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4235 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4236 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4237 |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
4238 size_t rj = ri + 2; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4239 if (d->tokens[ri + 1] == CAT) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4240 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4241 for (; rj < d->tindex - 1; rj += 2) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4242 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4243 if ((rj != ri && (d->tokens[rj] <= 0 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4244 || NOTCHAR <= d->tokens[rj])) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4245 || d->tokens[rj + 1] != CAT) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4246 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4247 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4248 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4249 mp = allocmust (mp, ((rj - ri) >> 1) + 1); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4250 mp->is[0] = mp->left[0] = mp->right[0] |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4251 = case_fold_unibyte ? toupper (t) : t; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4252 |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
4253 size_t i; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4254 for (i = 1; ri + 2 < rj; i++) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4255 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4256 ri += 2; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4257 t = d->tokens[ri]; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4258 mp->is[i] = mp->left[i] = mp->right[i] |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4259 = case_fold_unibyte ? toupper (t) : t; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4260 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4261 mp->is[i] = mp->left[i] = mp->right[i] = '\0'; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4262 mp->in = enlist (mp->in, mp->is, i); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4263 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4264 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4265 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4266 done:; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4267 |
18629
32aa1933afb8
dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents:
18628
diff
changeset
|
4268 struct dfamust *dm = NULL; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4269 if (*result) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4270 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4271 dm = xmalloc (sizeof *dm); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4272 dm->exact = exact; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4273 dm->begline = begline; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4274 dm->endline = endline; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4275 dm->must = xstrdup (result); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4276 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4277 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4278 while (mp) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4279 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4280 must *prev = mp->prev; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4281 freemust (mp); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4282 mp = prev; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4283 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4284 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4285 return dm; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4286 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4287 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4288 void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4289 dfamustfree (struct dfamust *dm) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4290 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4291 free (dm->must); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4292 free (dm); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4293 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4294 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4295 struct dfa * |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4296 dfaalloc (void) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4297 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4298 return xmalloc (sizeof (struct dfa)); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4299 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4300 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4301 /* Initialize DFA. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4302 void |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4303 dfasyntax (struct dfa *dfa, struct localeinfo const *linfo, |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4304 reg_syntax_t bits, int dfaopts) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4305 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4306 memset (dfa, 0, offsetof (struct dfa, dfaexec)); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4307 dfa->dfaexec = linfo->multibyte ? dfaexec_mb : dfaexec_sb; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4308 dfa->simple_locale = using_simple_locale (linfo->multibyte); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4309 dfa->localeinfo = *linfo; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4310 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4311 dfa->fast = !dfa->localeinfo.multibyte; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4312 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4313 dfa->canychar = -1; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4314 dfa->lex.cur_mb_len = 1; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4315 dfa->syntax.syntax_bits_set = true; |
18557
34cdc221276c
dfa: remove DFA_CASE_FOLD flag in favor of RE_ICASE
Paul Eggert <eggert@cs.ucla.edu>
parents:
18556
diff
changeset
|
4316 dfa->syntax.case_fold = (bits & RE_ICASE) != 0; |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4317 dfa->syntax.anchor = (dfaopts & DFA_ANCHOR) != 0; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4318 dfa->syntax.eolbyte = dfaopts & DFA_EOL_NUL ? '\0' : '\n'; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4319 dfa->syntax.syntax_bits = bits; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4320 |
18628
dbd0afa797c5
dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents:
18626
diff
changeset
|
4321 for (int i = CHAR_MIN; i <= CHAR_MAX; ++i) |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4322 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4323 unsigned char uc = i; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4324 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4325 dfa->syntax.sbit[uc] = char_context (dfa, uc); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4326 switch (dfa->syntax.sbit[uc]) |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4327 { |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4328 case CTX_LETTER: |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
4329 setbit (uc, &dfa->syntax.letters); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4330 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4331 case CTX_NEWLINE: |
18618
500f7d1fe5a2
dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents:
18608
diff
changeset
|
4332 setbit (uc, &dfa->syntax.newline); |
18410
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4333 break; |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4334 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4335 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4336 /* POSIX requires that the five bytes in "\n\r./" (including the |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4337 terminating NUL) cannot occur inside a multibyte character. */ |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4338 dfa->syntax.never_trail[uc] = (dfa->localeinfo.using_utf8 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4339 ? (uc & 0xc0) != 0x80 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4340 : strchr ("\n\r./", uc) != NULL); |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4341 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4342 } |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4343 |
a8d2b9364721
dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff
changeset
|
4344 /* vim:set shiftwidth=2: */ |