annotate lib/dfa.c @ 40047:183a2f6b0b16

revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7 v0.1-2213-gae4b73e28 caused a regression in grep-3.2 (no match): echo '123-x'|LC_ALL=C grep -E '.\bx' The goal is to revert the first, but reverting it requires to restore the function deleted in the second. I ran this to restore the deleted function: git show v0.1-2281-g95cd86dd7 lib/dfa.c \ | perl -0777 -pe 's/^@@[^\n]*dfaan.*//ms' \ | patch -R -p1 * lib/dfa.c (charclass_context): Restore deleted function. Reverting the primary commit removes this change: dfa: Simplify a building state * lib/dfa.c (build_state): Simplify a building state.
author Jim Meyering <meyering@fb.com>
date Thu, 20 Dec 2018 19:51:48 -0800
parents c51e38088432
children b06060465f09
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1 /* dfa.c - deterministic extended regexp routines for GNU
19484
10eb9086bea0 maint: Run 'make update-copyright'
Paul Eggert <eggert@cs.ucla.edu>
parents: 18931
diff changeset
2 Copyright (C) 1988, 1998, 2000, 2002, 2004-2005, 2007-2018 Free Software
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3 Foundation, Inc.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
5 This program is free software; you can redistribute it and/or modify
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
6 it under the terms of the GNU General Public License as published by
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
7 the Free Software Foundation; either version 3, or (at your option)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
8 any later version.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
9
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
10 This program is distributed in the hope that it will be useful,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
13 GNU General Public License for more details.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
14
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
15 You should have received a copy of the GNU General Public License
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
16 along with this program; if not, write to the Free Software
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
17 Foundation, Inc.,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
18 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
19
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
20 /* Written June, 1988 by Mike Haertel
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
21 Modified July, 1988 by Arthur David Olson to assist BMG speedups */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
22
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
23 #include <config.h>
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
24
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
25 #include "dfa.h"
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
26
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
27 #include <assert.h>
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
28 #include <ctype.h>
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
29 #include <stdint.h>
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
30 #include <stdio.h>
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
31 #include <stdlib.h>
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
32 #include <limits.h>
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
33 #include <string.h>
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
34 #include <locale.h>
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
35
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
36 static bool
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
37 streq (char const *a, char const *b)
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
38 {
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
39 return strcmp (a, b) == 0;
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
40 }
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
41
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
42 static bool
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
43 isasciidigit (char c)
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
44 {
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
45 return '0' <= c && c <= '9';
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
46 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
47
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
48 #include "gettext.h"
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
49 #define _(str) gettext (str)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
50
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
51 #include <wchar.h>
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
52
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
53 #include "intprops.h"
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
54 #include "xalloc.h"
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
55 #include "localeinfo.h"
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
56
18914
886945d1fa95 manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents: 18752
diff changeset
57 #ifndef FALLTHROUGH
886945d1fa95 manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents: 18752
diff changeset
58 # if __GNUC__ < 7
886945d1fa95 manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents: 18752
diff changeset
59 # define FALLTHROUGH ((void) 0)
886945d1fa95 manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents: 18752
diff changeset
60 # else
886945d1fa95 manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents: 18752
diff changeset
61 # define FALLTHROUGH __attribute__ ((__fallthrough__))
886945d1fa95 manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents: 18752
diff changeset
62 # endif
886945d1fa95 manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents: 18752
diff changeset
63 #endif
886945d1fa95 manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents: 18752
diff changeset
64
18560
ac2082d27eed dfa: fix glitches in previous commit
Paul Eggert <eggert@cs.ucla.edu>
parents: 18559
diff changeset
65 #ifndef MIN
ac2082d27eed dfa: fix glitches in previous commit
Paul Eggert <eggert@cs.ucla.edu>
parents: 18559
diff changeset
66 # define MIN(a,b) ((a) < (b) ? (a) : (b))
ac2082d27eed dfa: fix glitches in previous commit
Paul Eggert <eggert@cs.ucla.edu>
parents: 18559
diff changeset
67 #endif
ac2082d27eed dfa: fix glitches in previous commit
Paul Eggert <eggert@cs.ucla.edu>
parents: 18559
diff changeset
68
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
69 /* HPUX defines these as macros in sys/param.h. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
70 #ifdef setbit
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
71 # undef setbit
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
72 #endif
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
73 #ifdef clrbit
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
74 # undef clrbit
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
75 #endif
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
76
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
77 /* First integer value that is greater than any character code. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
78 enum { NOTCHAR = 1 << CHAR_BIT };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
79
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
80 /* This represents part of a character class. It must be unsigned and
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
81 at least CHARCLASS_WORD_BITS wide. Any excess bits are zero. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
82 typedef unsigned long int charclass_word;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
83
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
84 /* CHARCLASS_WORD_BITS is the number of bits used in a charclass word.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
85 CHARCLASS_PAIR (LO, HI) is part of a charclass initializer, and
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
86 represents 64 bits' worth of a charclass, where LO and HI are the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
87 low and high-order 32 bits of the 64-bit quantity. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
88 #if ULONG_MAX >> 31 >> 31 < 3
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
89 enum { CHARCLASS_WORD_BITS = 32 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
90 # define CHARCLASS_PAIR(lo, hi) lo, hi
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
91 #else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
92 enum { CHARCLASS_WORD_BITS = 64 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
93 # define CHARCLASS_PAIR(lo, hi) (((charclass_word) (hi) << 32) + (lo))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
94 #endif
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
95
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
96 /* An initializer for a charclass whose 32-bit words are A through H. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
97 #define CHARCLASS_INIT(a, b, c, d, e, f, g, h) \
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
98 {{ \
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
99 CHARCLASS_PAIR (a, b), CHARCLASS_PAIR (c, d), \
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
100 CHARCLASS_PAIR (e, f), CHARCLASS_PAIR (g, h) \
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
101 }}
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
102
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
103 /* The maximum useful value of a charclass_word; all used bits are 1. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
104 static charclass_word const CHARCLASS_WORD_MASK
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
105 = ((charclass_word) 1 << (CHARCLASS_WORD_BITS - 1) << 1) - 1;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
106
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
107 /* Number of words required to hold a bit for every character. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
108 enum
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
109 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
110 CHARCLASS_WORDS = (NOTCHAR + CHARCLASS_WORD_BITS - 1) / CHARCLASS_WORD_BITS
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
111 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
112
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
113 /* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
114 typedef struct { charclass_word w[CHARCLASS_WORDS]; } charclass;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
115
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
116 /* Convert a possibly-signed character to an unsigned character. This is
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
117 a bit safer than casting to unsigned char, since it catches some type
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
118 errors that the cast doesn't. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
119 static unsigned char
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
120 to_uchar (char ch)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
121 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
122 return ch;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
123 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
124
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
125 /* Contexts tell us whether a character is a newline or a word constituent.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
126 Word-constituent characters are those that satisfy iswalnum, plus '_'.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
127 Each character has a single CTX_* value; bitmasks of CTX_* values denote
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
128 a particular character class.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
129
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
130 A state also stores a context value, which is a bitmask of CTX_* values.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
131 A state's context represents a set of characters that the state's
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
132 predecessors must match. For example, a state whose context does not
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
133 include CTX_LETTER will never have transitions where the previous
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
134 character is a word constituent. A state whose context is CTX_ANY
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
135 might have transitions from any character. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
136
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
137 enum
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
138 {
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
139 CTX_NONE = 1,
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
140 CTX_LETTER = 2,
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
141 CTX_NEWLINE = 4,
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
142 CTX_ANY = 7
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
143 };
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
144
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
145 /* Sometimes characters can only be matched depending on the surrounding
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
146 context. Such context decisions depend on what the previous character
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
147 was, and the value of the current (lookahead) character. Context
18667
c83459d710c4 dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents: 18666
diff changeset
148 dependent constraints are encoded as 9-bit integers. Each bit that
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
149 is set indicates that the constraint succeeds in the corresponding
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
150 context.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
151
18667
c83459d710c4 dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents: 18666
diff changeset
152 bit 6-8 - valid contexts when next character is CTX_NEWLINE
c83459d710c4 dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents: 18666
diff changeset
153 bit 3-5 - valid contexts when next character is CTX_LETTER
c83459d710c4 dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents: 18666
diff changeset
154 bit 0-2 - valid contexts when next character is CTX_NONE
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
155
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
156 succeeds_in_context determines whether a given constraint
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
157 succeeds in a particular context. Prev is a bitmask of possible
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
158 context values for the previous character, curr is the (single-bit)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
159 context value for the lookahead character. */
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
160 static int
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
161 newline_constraint (int constraint)
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
162 {
18667
c83459d710c4 dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents: 18666
diff changeset
163 return (constraint >> 6) & 7;
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
164 }
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
165 static int
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
166 letter_constraint (int constraint)
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
167 {
18667
c83459d710c4 dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents: 18666
diff changeset
168 return (constraint >> 3) & 7;
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
169 }
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
170 static int
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
171 other_constraint (int constraint)
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
172 {
18667
c83459d710c4 dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents: 18666
diff changeset
173 return constraint & 7;
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
174 }
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
175
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
176 static bool
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
177 succeeds_in_context (int constraint, int prev, int curr)
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
178 {
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
179 return !! (((curr & CTX_NONE ? other_constraint (constraint) : 0) \
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
180 | (curr & CTX_LETTER ? letter_constraint (constraint) : 0) \
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
181 | (curr & CTX_NEWLINE ? newline_constraint (constraint) : 0)) \
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
182 & prev);
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
183 }
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
184
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
185 /* The following describe what a constraint depends on. */
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
186 static bool
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
187 prev_newline_dependent (int constraint)
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
188 {
18667
c83459d710c4 dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents: 18666
diff changeset
189 return ((constraint ^ constraint >> 2) & 0111) != 0;
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
190 }
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
191 static bool
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
192 prev_letter_dependent (int constraint)
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
193 {
18667
c83459d710c4 dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents: 18666
diff changeset
194 return ((constraint ^ constraint >> 1) & 0111) != 0;
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
195 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
196
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
197 /* Tokens that match the empty string subject to some constraint actually
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
198 work by applying that constraint to determine what may follow them,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
199 taking into account what has gone before. The following values are
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
200 the constraints corresponding to the special tokens previously defined. */
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
201 enum
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
202 {
18667
c83459d710c4 dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents: 18666
diff changeset
203 NO_CONSTRAINT = 0777,
c83459d710c4 dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents: 18666
diff changeset
204 BEGLINE_CONSTRAINT = 0444,
c83459d710c4 dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents: 18666
diff changeset
205 ENDLINE_CONSTRAINT = 0700,
c83459d710c4 dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents: 18666
diff changeset
206 BEGWORD_CONSTRAINT = 0050,
c83459d710c4 dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents: 18666
diff changeset
207 ENDWORD_CONSTRAINT = 0202,
c83459d710c4 dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents: 18666
diff changeset
208 LIMWORD_CONSTRAINT = 0252,
c83459d710c4 dfa: shrink constraints from 4 bits to 3
Paul Eggert <eggert@cs.ucla.edu>
parents: 18666
diff changeset
209 NOTLIMWORD_CONSTRAINT = 0525
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
210 };
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
211
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
212 /* The regexp is parsed into an array of tokens in postfix form. Some tokens
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
213 are operators and others are terminal symbols. Most (but not all) of these
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
214 codes are returned by the lexical analyzer. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
215
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
216 typedef ptrdiff_t token;
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
217 static ptrdiff_t const TOKEN_MAX = PTRDIFF_MAX;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
218
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
219 /* States are indexed by state_num values. These are normally
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
220 nonnegative but -1 is used as a special value. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
221 typedef ptrdiff_t state_num;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
222
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
223 /* Predefined token values. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
224 enum
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
225 {
39857
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
226 END = -1, /* END is a terminal symbol that matches the
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
227 end of input; any value of END or less in
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
228 the parse tree is such a symbol. Accepting
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
229 states of the DFA are those that would have
39857
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
230 a transition on END. This is -1, not some
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
231 more-negative value, to tweak the speed of
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
232 comparisons to END. */
39855
a29036ff511d dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39760
diff changeset
233
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
234 /* Ordinary character values are terminal symbols that match themselves. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
235
39857
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
236 /* CSET must come last in the following list of special tokens. Otherwise,
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
237 the list order matters only for performance. Related special tokens
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
238 should have nearby values so that code like (t == ANYCHAR || t == MBCSET
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
239 || CSET <= t) can be done with a single machine-level comparison. */
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
240
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
241 EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
242 the empty string. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
243
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
244 QMARK, /* QMARK is an operator of one argument that
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
245 matches zero or one occurrences of its
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
246 argument. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
247
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
248 STAR, /* STAR is an operator of one argument that
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
249 matches the Kleene closure (zero or more
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
250 occurrences) of its argument. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
251
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
252 PLUS, /* PLUS is an operator of one argument that
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
253 matches the positive closure (one or more
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
254 occurrences) of its argument. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
255
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
256 REPMN, /* REPMN is a lexical token corresponding
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
257 to the {m,n} construct. REPMN never
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
258 appears in the compiled token vector. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
259
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
260 CAT, /* CAT is an operator of two arguments that
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
261 matches the concatenation of its
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
262 arguments. CAT is never returned by the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
263 lexical analyzer. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
264
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
265 OR, /* OR is an operator of two arguments that
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
266 matches either of its arguments. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
267
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
268 LPAREN, /* LPAREN never appears in the parse tree,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
269 it is only a lexeme. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
270
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
271 RPAREN, /* RPAREN never appears in the parse tree. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
272
39857
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
273 WCHAR, /* Only returned by lex. wctok contains
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
274 the wide character representation. */
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
275
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
276 ANYCHAR, /* ANYCHAR is a terminal symbol that matches
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
277 a valid multibyte (or single byte) character.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
278 It is used only if MB_CUR_MAX > 1. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
279
39857
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
280 BEG, /* BEG is an initial symbol that matches the
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
281 beginning of input. */
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
282
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
283 BEGLINE, /* BEGLINE is a terminal symbol that matches
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
284 the empty string at the beginning of a
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
285 line. */
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
286
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
287 ENDLINE, /* ENDLINE is a terminal symbol that matches
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
288 the empty string at the end of a line. */
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
289
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
290 BEGWORD, /* BEGWORD is a terminal symbol that matches
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
291 the empty string at the beginning of a
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
292 word. */
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
293
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
294 ENDWORD, /* ENDWORD is a terminal symbol that matches
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
295 the empty string at the end of a word. */
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
296
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
297 LIMWORD, /* LIMWORD is a terminal symbol that matches
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
298 the empty string at the beginning or the
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
299 end of a word. */
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
300
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
301 NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
302 matches the empty string not at
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
303 the beginning or end of a word. */
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
304
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
305 BACKREF, /* BACKREF is generated by \<digit>
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
306 or by any other construct that
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
307 is not completely handled. If the scanner
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
308 detects a transition on backref, it returns
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
309 a kind of "semi-success" indicating that
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
310 the match will have to be verified with
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
311 a backtracking matcher. */
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
312
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
313 MBCSET, /* MBCSET is similar to CSET, but for
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
314 multibyte characters. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
315
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
316 CSET /* CSET and (and any value greater) is a
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
317 terminal symbol that matches any of a
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
318 class of characters. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
319 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
320
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
321
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
322 /* States of the recognizer correspond to sets of positions in the parse
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
323 tree, together with the constraints under which they may be matched.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
324 So a position is encoded as an index into the parse tree together with
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
325 a constraint. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
326 typedef struct
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
327 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
328 size_t index; /* Index into the parse array. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
329 unsigned int constraint; /* Constraint for matching this position. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
330 } position;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
331
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
332 /* Sets of positions are stored as arrays. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
333 typedef struct
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
334 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
335 position *elems; /* Elements of this position set. */
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
336 ptrdiff_t nelem; /* Number of elements in this set. */
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
337 ptrdiff_t alloc; /* Number of elements allocated in ELEMS. */
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
338 } position_set;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
339
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
340 /* A state of the dfa consists of a set of positions, some flags,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
341 and the token value of the lowest-numbered position of the state that
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
342 contains an END token. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
343 typedef struct
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
344 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
345 size_t hash; /* Hash of the positions of this state. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
346 position_set elems; /* Positions this state could match. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
347 unsigned char context; /* Context from previous state. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
348 unsigned short constraint; /* Constraint for this state to accept. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
349 token first_end; /* Token value of the first END in elems. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
350 position_set mbps; /* Positions which can match multibyte
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
351 characters or the follows, e.g., period.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
352 Used only if MB_CUR_MAX > 1. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
353 state_num mb_trindex; /* Index of this state in MB_TRANS, or
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
354 negative if the state does not have
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
355 ANYCHAR. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
356 } dfa_state;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
357
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
358 /* Maximum for any transition table count. This should be at least 3,
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
359 for the initial state setup. */
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
360 enum { MAX_TRCOUNT = 1024 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
361
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
362 /* A bracket operator.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
363 e.g., [a-c], [[:alpha:]], etc. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
364 struct mb_char_classes
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
365 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
366 ptrdiff_t cset;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
367 bool invert;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
368 wchar_t *chars; /* Normal characters. */
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
369 ptrdiff_t nchars;
18620
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
370 ptrdiff_t nchars_alloc;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
371 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
372
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
373 struct regex_syntax
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
374 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
375 /* Syntax bits controlling the behavior of the lexical analyzer. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
376 reg_syntax_t syntax_bits;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
377 bool syntax_bits_set;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
378
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
379 /* Flag for case-folding letters into sets. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
380 bool case_fold;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
381
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
382 /* True if ^ and $ match only the start and end of data, and do not match
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
383 end-of-line within data. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
384 bool anchor;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
385
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
386 /* End-of-line byte in data. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
387 unsigned char eolbyte;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
388
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
389 /* Cache of char-context values. */
18621
fd76a0964db1 dfa: shorten sbit, success
Paul Eggert <eggert@cs.ucla.edu>
parents: 18620
diff changeset
390 char sbit[NOTCHAR];
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
391
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
392 /* If never_trail[B], the byte B cannot be a non-initial byte in a
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
393 multibyte character. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
394 bool never_trail[NOTCHAR];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
395
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
396 /* Set of characters considered letters. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
397 charclass letters;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
398
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
399 /* Set of characters that are newline. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
400 charclass newline;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
401 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
402
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
403 /* Lexical analyzer. All the dross that deals with the obnoxious
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
404 GNU Regex syntax bits is located here. The poor, suffering
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
405 reader is referred to the GNU Regex documentation for the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
406 meaning of the @#%!@#%^!@ syntax bits. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
407 struct lexer_state
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
408 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
409 char const *ptr; /* Pointer to next input character. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
410 size_t left; /* Number of characters remaining. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
411 token lasttok; /* Previous token returned; initially END. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
412 size_t parens; /* Count of outstanding left parens. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
413 int minrep, maxrep; /* Repeat counts for {m,n}. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
414
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
415 /* Wide character representation of the current multibyte character,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
416 or WEOF if there was an encoding error. Used only if
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
417 MB_CUR_MAX > 1. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
418 wint_t wctok;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
419
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
420 /* Length of the multibyte representation of wctok. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
421 int cur_mb_len;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
422
18620
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
423 /* The most recently analyzed multibyte bracket expression. */
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
424 struct mb_char_classes brack;
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
425
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
426 /* We're separated from beginning or (, | only by zero-width characters. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
427 bool laststart;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
428 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
429
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
430 /* Recursive descent parser for regular expressions. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
431
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
432 struct parser_state
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
433 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
434 token tok; /* Lookahead token. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
435 size_t depth; /* Current depth of a hypothetical stack
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
436 holding deferred productions. This is
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
437 used to determine the depth that will be
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
438 required of the real stack later on in
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
439 dfaanalyze. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
440 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
441
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
442 /* A compiled regular expression. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
443 struct dfa
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
444 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
445 /* Syntax configuration */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
446 struct regex_syntax syntax;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
447
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
448 /* Fields filled by the scanner. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
449 charclass *charclasses; /* Array of character sets for CSET tokens. */
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
450 ptrdiff_t cindex; /* Index for adding new charclasses. */
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
451 ptrdiff_t calloc; /* Number of charclasses allocated. */
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
452 size_t canychar; /* Index of anychar class, or (size_t) -1. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
453
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
454 /* Scanner state */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
455 struct lexer_state lex;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
456
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
457 /* Parser state */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
458 struct parser_state parse;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
459
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
460 /* Fields filled by the parser. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
461 token *tokens; /* Postfix parse array. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
462 size_t tindex; /* Index for adding new tokens. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
463 size_t talloc; /* Number of tokens currently allocated. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
464 size_t depth; /* Depth required of an evaluation stack
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
465 used for depth-first traversal of the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
466 parse tree. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
467 size_t nleaves; /* Number of leaves on the parse tree. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
468 size_t nregexps; /* Count of parallel regexps being built
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
469 with dfaparse. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
470 bool fast; /* The DFA is fast. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
471 token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
472 mbstate_t mbs; /* Multibyte conversion state. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
473
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
474 /* The following are valid only if MB_CUR_MAX > 1. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
475
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
476 /* The value of multibyte_prop[i] is defined by following rule.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
477 if tokens[i] < NOTCHAR
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
478 bit 0 : tokens[i] is the first byte of a character, including
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
479 single-byte characters.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
480 bit 1 : tokens[i] is the last byte of a character, including
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
481 single-byte characters.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
482
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
483 e.g.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
484 tokens
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
485 = 'single_byte_a', 'multi_byte_A', single_byte_b'
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
486 = 'sb_a', 'mb_A(1st byte)', 'mb_A(2nd byte)', 'mb_A(3rd byte)', 'sb_b'
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
487 multibyte_prop
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
488 = 3 , 1 , 0 , 2 , 3
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
489 */
18620
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
490 char *multibyte_prop;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
491
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
492 /* Fields filled by the superset. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
493 struct dfa *superset; /* Hint of the dfa. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
494
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
495 /* Fields filled by the state builder. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
496 dfa_state *states; /* States of the dfa. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
497 state_num sindex; /* Index for adding new states. */
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
498 ptrdiff_t salloc; /* Number of states currently allocated. */
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
499
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
500 /* Fields filled by the parse tree->NFA conversion. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
501 position_set *follows; /* Array of follow sets, indexed by position
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
502 index. The follow of a position is the set
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
503 of positions containing characters that
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
504 could conceivably follow a character
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
505 matching the given position in a string
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
506 matching the regexp. Allocated to the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
507 maximum possible position index. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
508 bool searchflag; /* We are supposed to build a searching
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
509 as opposed to an exact matcher. A searching
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
510 matcher finds the first and shortest string
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
511 matching a regexp anywhere in the buffer,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
512 whereas an exact matcher finds the longest
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
513 string matching, but anchored to the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
514 beginning of the buffer. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
515
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
516 /* Fields filled by dfaanalyze. */
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
517 int *constraints; /* Array of union of accepting constraints
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
518 in the follow of a position. */
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
519 int *separates; /* Array of contexts on follow of a
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
520 position. */
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
521
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
522 /* Fields filled by dfaexec. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
523 state_num tralloc; /* Number of transition tables that have
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
524 slots so far, not counting trans[-1] and
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
525 trans[-2]. */
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
526 int trcount; /* Number of transition tables that have
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
527 been built, other than for initial
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
528 states. */
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
529 int min_trcount; /* Number of initial states. Equivalently,
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
530 the minimum state number for which trcount
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
531 counts transitions. */
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
532 state_num **trans; /* Transition tables for states that can
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
533 never accept. If the transitions for a
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
534 state have not yet been computed, or the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
535 state could possibly accept, its entry in
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
536 this table is NULL. This points to two
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
537 past the start of the allocated array,
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
538 and trans[-1] and trans[-2] are always
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
539 NULL. */
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
540 state_num **fails; /* Transition tables after failing to accept
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
541 on a state that potentially could do so.
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
542 If trans[i] is non-null, fails[i] must
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
543 be null. */
18621
fd76a0964db1 dfa: shorten sbit, success
Paul Eggert <eggert@cs.ucla.edu>
parents: 18620
diff changeset
544 char *success; /* Table of acceptance conditions used in
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
545 dfaexec and computed in build_state. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
546 state_num *newlines; /* Transitions on newlines. The entry for a
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
547 newline in any transition table is always
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
548 -1 so we can count lines without wasting
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
549 too many cycles. The transition for a
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
550 newline is stored separately and handled
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
551 as a special case. Newline is also used
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
552 as a sentinel at the end of the buffer. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
553 state_num initstate_notbol; /* Initial state for CTX_LETTER and CTX_NONE
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
554 context in multibyte locales, in which we
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
555 do not distinguish between their contexts,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
556 as not supported word. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
557 position_set mb_follows; /* Follow set added by ANYCHAR on demand. */
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
558 state_num **mb_trans; /* Transition tables for states with
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
559 ANYCHAR. */
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
560 state_num mb_trcount; /* Number of transition tables for states with
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
561 ANYCHAR that have actually been built. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
562
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
563 /* Information derived from the locale. This is at the end so that
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
564 a quick memset need not clear it specially. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
565
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
566 /* dfaexec implementation. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
567 char *(*dfaexec) (struct dfa *, char const *, char *,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
568 bool, size_t *, bool *);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
569
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
570 /* The locale is simple, like the C locale. These locales can be
18752
82d233900292 dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents: 18679
diff changeset
571 processed more efficiently, as they are single-byte, their native
82d233900292 dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents: 18679
diff changeset
572 character set is in collating-sequence order, and they do not
82d233900292 dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents: 18679
diff changeset
573 have multi-character collating elements. */
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
574 bool simple_locale;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
575
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
576 /* Other cached information derived from the locale. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
577 struct localeinfo localeinfo;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
578 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
579
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
580 /* User access to dfa internals. */
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
581
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
582 /* S could possibly be an accepting state of R. */
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
583 static bool
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
584 accepting (state_num s, struct dfa const *r)
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
585 {
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
586 return r->states[s].constraint != 0;
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
587 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
588
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
589 /* STATE accepts in the specified context. */
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
590 static bool
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
591 accepts_in_context (int prev, int curr, state_num state, struct dfa const *dfa)
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
592 {
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
593 return succeeds_in_context (dfa->states[state].constraint, prev, curr);
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
594 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
595
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
596 static void regexp (struct dfa *dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
597
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
598 /* Store into *PWC the result of converting the leading bytes of the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
599 multibyte buffer S of length N bytes, using D->localeinfo.sbctowc
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
600 and updating the conversion state in *D. On conversion error,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
601 convert just a single byte, to WEOF. Return the number of bytes
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
602 converted.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
603
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
604 This differs from mbrtowc (PWC, S, N, &D->mbs) as follows:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
605
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
606 * PWC points to wint_t, not to wchar_t.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
607 * The last arg is a dfa *D instead of merely a multibyte conversion
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
608 state D->mbs.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
609 * N must be at least 1.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
610 * S[N - 1] must be a sentinel byte.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
611 * Shift encodings are not supported.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
612 * The return value is always in the range 1..N.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
613 * D->mbs is always valid afterwards.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
614 * *PWC is always set to something. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
615 static size_t
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
616 mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct dfa *d)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
617 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
618 unsigned char uc = s[0];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
619 wint_t wc = d->localeinfo.sbctowc[uc];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
620
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
621 if (wc == WEOF)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
622 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
623 wchar_t wch;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
624 size_t nbytes = mbrtowc (&wch, s, n, &d->mbs);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
625 if (0 < nbytes && nbytes < (size_t) -2)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
626 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
627 *pwc = wch;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
628 return nbytes;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
629 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
630 memset (&d->mbs, 0, sizeof d->mbs);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
631 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
632
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
633 *pwc = wc;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
634 return 1;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
635 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
636
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
637 #ifdef DEBUG
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
638
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
639 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
640 prtok (token t)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
641 {
39855
a29036ff511d dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39760
diff changeset
642 if (t <= END)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
643 fprintf (stderr, "END");
39855
a29036ff511d dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39760
diff changeset
644 else if (0 <= t && t < NOTCHAR)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
645 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
646 unsigned int ch = t;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
647 fprintf (stderr, "0x%02x", ch);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
648 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
649 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
650 {
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
651 char const *s;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
652 switch (t)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
653 {
39855
a29036ff511d dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39760
diff changeset
654 case BEG:
a29036ff511d dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39760
diff changeset
655 s = "BEG";
a29036ff511d dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39760
diff changeset
656 break;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
657 case EMPTY:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
658 s = "EMPTY";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
659 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
660 case BACKREF:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
661 s = "BACKREF";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
662 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
663 case BEGLINE:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
664 s = "BEGLINE";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
665 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
666 case ENDLINE:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
667 s = "ENDLINE";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
668 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
669 case BEGWORD:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
670 s = "BEGWORD";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
671 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
672 case ENDWORD:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
673 s = "ENDWORD";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
674 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
675 case LIMWORD:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
676 s = "LIMWORD";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
677 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
678 case NOTLIMWORD:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
679 s = "NOTLIMWORD";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
680 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
681 case QMARK:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
682 s = "QMARK";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
683 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
684 case STAR:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
685 s = "STAR";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
686 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
687 case PLUS:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
688 s = "PLUS";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
689 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
690 case CAT:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
691 s = "CAT";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
692 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
693 case OR:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
694 s = "OR";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
695 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
696 case LPAREN:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
697 s = "LPAREN";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
698 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
699 case RPAREN:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
700 s = "RPAREN";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
701 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
702 case ANYCHAR:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
703 s = "ANYCHAR";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
704 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
705 case MBCSET:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
706 s = "MBCSET";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
707 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
708 default:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
709 s = "CSET";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
710 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
711 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
712 fprintf (stderr, "%s", s);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
713 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
714 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
715 #endif /* DEBUG */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
716
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
717 /* Stuff pertaining to charclasses. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
718
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
719 static bool
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
720 tstbit (unsigned int b, charclass const *c)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
721 {
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
722 return c->w[b / CHARCLASS_WORD_BITS] >> b % CHARCLASS_WORD_BITS & 1;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
723 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
724
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
725 static void
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
726 setbit (unsigned int b, charclass *c)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
727 {
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
728 charclass_word one = 1;
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
729 c->w[b / CHARCLASS_WORD_BITS] |= one << b % CHARCLASS_WORD_BITS;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
730 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
731
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
732 static void
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
733 clrbit (unsigned int b, charclass *c)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
734 {
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
735 charclass_word one = 1;
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
736 c->w[b / CHARCLASS_WORD_BITS] &= ~(one << b % CHARCLASS_WORD_BITS);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
737 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
738
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
739 static void
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
740 zeroset (charclass *s)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
741 {
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
742 memset (s, 0, sizeof *s);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
743 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
744
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
745 static void
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
746 fillset (charclass *s)
18525
1545248f9c57 dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents: 18524
diff changeset
747 {
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
748 for (int i = 0; i < CHARCLASS_WORDS; i++)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
749 s->w[i] = CHARCLASS_WORD_MASK;
18525
1545248f9c57 dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents: 18524
diff changeset
750 }
1545248f9c57 dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents: 18524
diff changeset
751
1545248f9c57 dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents: 18524
diff changeset
752 static void
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
753 notset (charclass *s)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
754 {
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
755 for (int i = 0; i < CHARCLASS_WORDS; ++i)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
756 s->w[i] = CHARCLASS_WORD_MASK & ~s->w[i];
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
757 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
758
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
759 static bool
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
760 equal (charclass const *s1, charclass const *s2)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
761 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
762 charclass_word w = 0;
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
763 for (int i = 0; i < CHARCLASS_WORDS; i++)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
764 w |= s1->w[i] ^ s2->w[i];
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
765 return w == 0;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
766 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
767
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
768 static bool
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
769 emptyset (charclass const *s)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
770 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
771 charclass_word w = 0;
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
772 for (int i = 0; i < CHARCLASS_WORDS; i++)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
773 w |= s->w[i];
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
774 return w == 0;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
775 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
776
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
777 /* Grow PA, which points to an array of *NITEMS items, and return the
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
778 location of the reallocated array, updating *NITEMS to reflect its
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
779 new size. The new array will contain at least NITEMS_INCR_MIN more
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
780 items, but will not contain more than NITEMS_MAX items total.
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
781 ITEM_SIZE is the size of each item, in bytes.
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
782
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
783 ITEM_SIZE and NITEMS_INCR_MIN must be positive. *NITEMS must be
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
784 nonnegative. If NITEMS_MAX is -1, it is treated as if it were
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
785 infinity.
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
786
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
787 If PA is null, then allocate a new array instead of reallocating
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
788 the old one.
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
789
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
790 Thus, to grow an array A without saving its old contents, do
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
791 { free (A); A = xpalloc (NULL, &AITEMS, ...); }. */
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
792
18560
ac2082d27eed dfa: fix glitches in previous commit
Paul Eggert <eggert@cs.ucla.edu>
parents: 18559
diff changeset
793 static void *
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
794 xpalloc (void *pa, ptrdiff_t *nitems, ptrdiff_t nitems_incr_min,
39722
f7fc45eece35 Continue to use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 19484
diff changeset
795 ptrdiff_t nitems_max, ptrdiff_t item_size)
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
796 {
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
797 ptrdiff_t n0 = *nitems;
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
798
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
799 /* The approximate size to use for initial small allocation
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
800 requests. This is the largest "small" request for the GNU C
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
801 library malloc. */
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
802 enum { DEFAULT_MXFAST = 64 * sizeof (size_t) / 4 };
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
803
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
804 /* If the array is tiny, grow it to about (but no greater than)
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
805 DEFAULT_MXFAST bytes. Otherwise, grow it by about 50%.
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
806 Adjust the growth according to three constraints: NITEMS_INCR_MIN,
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
807 NITEMS_MAX, and what the C language can represent safely. */
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
808
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
809 ptrdiff_t n, nbytes;
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
810 if (INT_ADD_WRAPV (n0, n0 >> 1, &n))
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
811 n = PTRDIFF_MAX;
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
812 if (0 <= nitems_max && nitems_max < n)
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
813 n = nitems_max;
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
814
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
815 ptrdiff_t adjusted_nbytes
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
816 = ((INT_MULTIPLY_WRAPV (n, item_size, &nbytes) || SIZE_MAX < nbytes)
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
817 ? MIN (PTRDIFF_MAX, SIZE_MAX)
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
818 : nbytes < DEFAULT_MXFAST ? DEFAULT_MXFAST : 0);
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
819 if (adjusted_nbytes)
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
820 {
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
821 n = adjusted_nbytes / item_size;
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
822 nbytes = adjusted_nbytes - adjusted_nbytes % item_size;
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
823 }
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
824
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
825 if (! pa)
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
826 *nitems = 0;
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
827 if (n - n0 < nitems_incr_min
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
828 && (INT_ADD_WRAPV (n0, nitems_incr_min, &n)
39722
f7fc45eece35 Continue to use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 19484
diff changeset
829 || (0 <= nitems_max && nitems_max < n)
f7fc45eece35 Continue to use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 19484
diff changeset
830 || INT_MULTIPLY_WRAPV (n, item_size, &nbytes)))
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
831 xalloc_die ();
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
832 pa = xrealloc (pa, nbytes);
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
833 *nitems = n;
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
834 return pa;
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
835 }
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
836
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
837 /* Ensure that the array addressed by PA holds at least I + 1 items.
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
838 Either return PA, or reallocate the array and return its new address.
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
839 Although PA may be null, the returned value is never null.
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
840
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
841 The array holds *NITEMS items, where 0 <= I <= *NITEMS; *NITEMS
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
842 is updated on reallocation. If PA is null, *NITEMS must be zero.
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
843 Do not allocate more than NITEMS_MAX items total; -1 means no limit.
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
844 ITEM_SIZE is the size of one item; it must be positive.
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
845 Avoid O(N**2) behavior on arrays growing linearly. */
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
846 static void *
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
847 maybe_realloc (void *pa, ptrdiff_t i, ptrdiff_t *nitems,
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
848 ptrdiff_t nitems_max, ptrdiff_t item_size)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
849 {
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
850 if (i < *nitems)
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
851 return pa;
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
852 return xpalloc (pa, nitems, 1, nitems_max, item_size);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
853 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
854
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
855 /* In DFA D, find the index of charclass S, or allocate a new one. */
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
856 static ptrdiff_t
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
857 charclass_index (struct dfa *d, charclass *s)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
858 {
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
859 ptrdiff_t i;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
860
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
861 for (i = 0; i < d->cindex; ++i)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
862 if (equal (s, &d->charclasses[i]))
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
863 return i;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
864 d->charclasses = maybe_realloc (d->charclasses, d->cindex, &d->calloc,
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
865 TOKEN_MAX - CSET, sizeof *d->charclasses);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
866 ++d->cindex;
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
867 d->charclasses[i] = *s;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
868 return i;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
869 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
870
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
871 static bool
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
872 unibyte_word_constituent (struct dfa const *dfa, unsigned char c)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
873 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
874 return dfa->localeinfo.sbctowc[c] != WEOF && (isalnum (c) || (c) == '_');
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
875 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
876
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
877 static int
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
878 char_context (struct dfa const *dfa, unsigned char c)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
879 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
880 if (c == dfa->syntax.eolbyte && !dfa->syntax.anchor)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
881 return CTX_NEWLINE;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
882 if (unibyte_word_constituent (dfa, c))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
883 return CTX_LETTER;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
884 return CTX_NONE;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
885 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
886
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
887 /* Set a bit in the charclass for the given wchar_t. Do nothing if WC
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
888 is represented by a multi-byte sequence. Even for MB_CUR_MAX == 1,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
889 this may happen when folding case in weird Turkish locales where
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
890 dotless i/dotted I are not included in the chosen character set.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
891 Return whether a bit was set in the charclass. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
892 static bool
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
893 setbit_wc (wint_t wc, charclass *c)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
894 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
895 int b = wctob (wc);
18619
5ceb90ef64b1 dfa: minor performance tweak
Paul Eggert <eggert@cs.ucla.edu>
parents: 18618
diff changeset
896 if (b < 0)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
897 return false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
898
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
899 setbit (b, c);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
900 return true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
901 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
902
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
903 /* Set a bit for B and its case variants in the charclass C.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
904 MB_CUR_MAX must be 1. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
905 static void
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
906 setbit_case_fold_c (int b, charclass *c)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
907 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
908 int ub = toupper (b);
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
909 for (int i = 0; i < NOTCHAR; i++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
910 if (toupper (i) == ub)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
911 setbit (i, c);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
912 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
913
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
914 /* Return true if the locale compatible with the C locale. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
915
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
916 static bool
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
917 using_simple_locale (bool multibyte)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
918 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
919 /* The native character set is known to be compatible with
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
920 the C locale. The following test isn't perfect, but it's good
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
921 enough in practice, as only ASCII and EBCDIC are in common use
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
922 and this test correctly accepts ASCII and rejects EBCDIC. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
923 enum { native_c_charset =
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
924 ('\b' == 8 && '\t' == 9 && '\n' == 10 && '\v' == 11 && '\f' == 12
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
925 && '\r' == 13 && ' ' == 32 && '!' == 33 && '"' == 34 && '#' == 35
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
926 && '%' == 37 && '&' == 38 && '\'' == 39 && '(' == 40 && ')' == 41
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
927 && '*' == 42 && '+' == 43 && ',' == 44 && '-' == 45 && '.' == 46
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
928 && '/' == 47 && '0' == 48 && '9' == 57 && ':' == 58 && ';' == 59
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
929 && '<' == 60 && '=' == 61 && '>' == 62 && '?' == 63 && 'A' == 65
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
930 && 'Z' == 90 && '[' == 91 && '\\' == 92 && ']' == 93 && '^' == 94
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
931 && '_' == 95 && 'a' == 97 && 'z' == 122 && '{' == 123 && '|' == 124
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
932 && '}' == 125 && '~' == 126)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
933 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
934
18519
702a2d8ac274 dfa: fix logic typo
Paul Eggert <eggert@cs.ucla.edu>
parents: 18444
diff changeset
935 if (!native_c_charset || multibyte)
702a2d8ac274 dfa: fix logic typo
Paul Eggert <eggert@cs.ucla.edu>
parents: 18444
diff changeset
936 return false;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
937 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
938 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
939 /* Treat C and POSIX locales as being compatible. Also, treat
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
940 errors as compatible, as these are invariably from stubs. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
941 char const *loc = setlocale (LC_ALL, NULL);
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
942 return !loc || streq (loc, "C") || streq (loc, "POSIX");
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
943 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
944 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
945
18634
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
946 /* Fetch the next lexical input character from the pattern. There
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
947 must at least one byte of pattern input. Set DFA->lex.wctok to the
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
948 value of the character or to WEOF depending on whether the input is
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
949 a valid multibyte character (possibly of length 1). Then return
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
950 the next input byte value, except return EOF if the input is a
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
951 multibyte character of length greater than 1. */
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
952 static int
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
953 fetch_wc (struct dfa *dfa)
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
954 {
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
955 size_t nbytes = mbs_to_wchar (&dfa->lex.wctok, dfa->lex.ptr, dfa->lex.left,
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
956 dfa);
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
957 dfa->lex.cur_mb_len = nbytes;
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
958 int c = nbytes == 1 ? to_uchar (dfa->lex.ptr[0]) : EOF;
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
959 dfa->lex.ptr += nbytes;
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
960 dfa->lex.left -= nbytes;
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
961 return c;
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
962 }
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
963
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
964 /* If there is no more input, report an error about unbalanced brackets.
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
965 Otherwise, behave as with fetch_wc (DFA). */
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
966 static int
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
967 bracket_fetch_wc (struct dfa *dfa)
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
968 {
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
969 if (! dfa->lex.left)
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
970 dfaerror (_("unbalanced ["));
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
971 return fetch_wc (dfa);
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
972 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
973
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
974 typedef int predicate (int);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
975
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
976 /* The following list maps the names of the Posix named character classes
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
977 to predicate functions that determine whether a given character is in
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
978 the class. The leading [ has already been eaten by the lexical
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
979 analyzer. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
980 struct dfa_ctype
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
981 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
982 const char *name;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
983 predicate *func;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
984 bool single_byte_only;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
985 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
986
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
987 static const struct dfa_ctype prednames[] = {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
988 {"alpha", isalpha, false},
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
989 {"upper", isupper, false},
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
990 {"lower", islower, false},
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
991 {"digit", isdigit, true},
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
992 {"xdigit", isxdigit, false},
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
993 {"space", isspace, false},
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
994 {"punct", ispunct, false},
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
995 {"alnum", isalnum, false},
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
996 {"print", isprint, false},
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
997 {"graph", isgraph, false},
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
998 {"cntrl", iscntrl, false},
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
999 {"blank", isblank, false},
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1000 {NULL, NULL, false}
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1001 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1002
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1003 static const struct dfa_ctype *_GL_ATTRIBUTE_PURE
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1004 find_pred (const char *str)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1005 {
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1006 for (unsigned int i = 0; prednames[i].name; ++i)
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
1007 if (streq (str, prednames[i].name))
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1008 return &prednames[i];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1009 return NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1010 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1011
18620
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1012 /* Parse a bracket expression, which possibly includes multibyte
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1013 characters. */
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1014 static token
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1015 parse_bracket_exp (struct dfa *dfa)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1016 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1017 /* This is a bracket expression that dfaexec is known to
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1018 process correctly. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1019 bool known_bracket_exp = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1020
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1021 /* Used to warn about [:space:].
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1022 Bit 0 = first character is a colon.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1023 Bit 1 = last character is a colon.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1024 Bit 2 = includes any other character but a colon.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1025 Bit 3 = includes ranges, char/equiv classes or collation elements. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1026 int colon_warning_state;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1027
18620
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1028 dfa->lex.brack.nchars = 0;
18633
42cabb9832cd dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18632
diff changeset
1029 charclass ccl;
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1030 zeroset (&ccl);
18634
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1031 int c = bracket_fetch_wc (dfa);
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
1032 bool invert = c == '^';
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
1033 if (invert)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1034 {
18634
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1035 c = bracket_fetch_wc (dfa);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1036 known_bracket_exp = dfa->simple_locale;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1037 }
18634
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1038 wint_t wc = dfa->lex.wctok;
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1039 int c1;
18634
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1040 wint_t wc1;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1041 colon_warning_state = (c == ':');
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1042 do
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1043 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1044 c1 = NOTCHAR; /* Mark c1 as not initialized. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1045 colon_warning_state &= ~2;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1046
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1047 /* Note that if we're looking at some other [:...:] construct,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1048 we just treat it as a bunch of ordinary characters. We can do
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1049 this because we assume regex has checked for syntax errors before
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1050 dfa is ever called. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1051 if (c == '[')
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1052 {
18634
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1053 c1 = bracket_fetch_wc (dfa);
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1054 wc1 = dfa->lex.wctok;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1055
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1056 if ((c1 == ':' && (dfa->syntax.syntax_bits & RE_CHAR_CLASSES))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1057 || c1 == '.' || c1 == '=')
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1058 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1059 enum { MAX_BRACKET_STRING_LEN = 32 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1060 char str[MAX_BRACKET_STRING_LEN + 1];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1061 size_t len = 0;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1062 for (;;)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1063 {
18634
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1064 c = bracket_fetch_wc (dfa);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1065 if (dfa->lex.left == 0
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1066 || (c == c1 && dfa->lex.ptr[0] == ']'))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1067 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1068 if (len < MAX_BRACKET_STRING_LEN)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1069 str[len++] = c;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1070 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1071 /* This is in any case an invalid class name. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1072 str[0] = '\0';
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1073 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1074 str[len] = '\0';
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1075
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1076 /* Fetch bracket. */
18634
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1077 c = bracket_fetch_wc (dfa);
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1078 wc = dfa->lex.wctok;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1079 if (c1 == ':')
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1080 /* Build character class. POSIX allows character
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1081 classes to match multicharacter collating elements,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1082 but the regex code does not support that, so do not
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1083 worry about that possibility. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1084 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1085 char const *class
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
1086 = (dfa->syntax.case_fold && (streq (str, "upper")
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
1087 || streq (str, "lower"))
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1088 ? "alpha" : str);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1089 const struct dfa_ctype *pred = find_pred (class);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1090 if (!pred)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1091 dfaerror (_("invalid character class"));
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1092
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1093 if (dfa->localeinfo.multibyte && !pred->single_byte_only)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1094 known_bracket_exp = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1095 else
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1096 for (int c2 = 0; c2 < NOTCHAR; ++c2)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1097 if (pred->func (c2))
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1098 setbit (c2, &ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1099 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1100 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1101 known_bracket_exp = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1102
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1103 colon_warning_state |= 8;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1104
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1105 /* Fetch new lookahead character. */
18634
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1106 c1 = bracket_fetch_wc (dfa);
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1107 wc1 = dfa->lex.wctok;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1108 continue;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1109 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1110
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1111 /* We treat '[' as a normal character here. c/c1/wc/wc1
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1112 are already set up. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1113 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1114
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1115 if (c == '\\'
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1116 && (dfa->syntax.syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
18634
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1117 {
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1118 c = bracket_fetch_wc (dfa);
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1119 wc = dfa->lex.wctok;
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1120 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1121
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1122 if (c1 == NOTCHAR)
18634
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1123 {
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1124 c1 = bracket_fetch_wc (dfa);
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1125 wc1 = dfa->lex.wctok;
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1126 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1127
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1128 if (c1 == '-')
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1129 /* build range characters. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1130 {
18634
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1131 int c2 = bracket_fetch_wc (dfa);
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1132 wint_t wc2 = dfa->lex.wctok;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1133
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1134 /* A bracket expression like [a-[.aa.]] matches an unknown set.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1135 Treat it like [-a[.aa.]] while parsing it, and
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1136 remember that the set is unknown. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1137 if (c2 == '[' && dfa->lex.ptr[0] == '.')
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1138 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1139 known_bracket_exp = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1140 c2 = ']';
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1141 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1142
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1143 if (c2 == ']')
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1144 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1145 /* In the case [x-], the - is an ordinary hyphen,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1146 which is left in c1, the lookahead character. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1147 dfa->lex.ptr -= dfa->lex.cur_mb_len;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1148 dfa->lex.left += dfa->lex.cur_mb_len;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1149 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1150 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1151 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1152 if (c2 == '\\' && (dfa->syntax.syntax_bits
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1153 & RE_BACKSLASH_ESCAPE_IN_LISTS))
18634
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1154 {
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1155 c2 = bracket_fetch_wc (dfa);
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1156 wc2 = dfa->lex.wctok;
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1157 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1158
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1159 colon_warning_state |= 8;
18634
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1160 c1 = bracket_fetch_wc (dfa);
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1161 wc1 = dfa->lex.wctok;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1162
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1163 /* Treat [x-y] as a range if x != y. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1164 if (wc != wc2 || wc == WEOF)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1165 {
18752
82d233900292 dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents: 18679
diff changeset
1166 if (dfa->simple_locale
82d233900292 dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents: 18679
diff changeset
1167 || (isasciidigit (c) & isasciidigit (c2)))
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1168 {
18752
82d233900292 dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents: 18679
diff changeset
1169 for (int ci = c; ci <= c2; ci++)
82d233900292 dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents: 18679
diff changeset
1170 if (dfa->syntax.case_fold && isalpha (ci))
82d233900292 dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents: 18679
diff changeset
1171 setbit_case_fold_c (ci, &ccl);
82d233900292 dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents: 18679
diff changeset
1172 else
82d233900292 dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents: 18679
diff changeset
1173 setbit (ci, &ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1174 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1175 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1176 known_bracket_exp = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1177
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1178 continue;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1179 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1180 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1181 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1182
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1183 colon_warning_state |= (c == ':') ? 2 : 4;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1184
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1185 if (!dfa->localeinfo.multibyte)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1186 {
18752
82d233900292 dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents: 18679
diff changeset
1187 if (dfa->syntax.case_fold && isalpha (c))
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1188 setbit_case_fold_c (c, &ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1189 else
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1190 setbit (c, &ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1191 continue;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1192 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1193
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1194 if (wc == WEOF)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1195 known_bracket_exp = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1196 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1197 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1198 wchar_t folded[CASE_FOLDED_BUFSIZE + 1];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1199 unsigned int n = (dfa->syntax.case_fold
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1200 ? case_folded_counterparts (wc, folded + 1) + 1
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1201 : 1);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1202 folded[0] = wc;
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1203 for (unsigned int i = 0; i < n; i++)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1204 if (!setbit_wc (folded[i], &ccl))
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1205 {
18620
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1206 dfa->lex.brack.chars
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1207 = maybe_realloc (dfa->lex.brack.chars, dfa->lex.brack.nchars,
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1208 &dfa->lex.brack.nchars_alloc, -1,
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1209 sizeof *dfa->lex.brack.chars);
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1210 dfa->lex.brack.chars[dfa->lex.brack.nchars++] = folded[i];
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1211 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1212 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1213 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1214 while ((wc = wc1, (c = c1) != ']'));
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1215
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1216 if (colon_warning_state == 7)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1217 dfawarn (_("character class syntax is [[:space:]], not [:space:]"));
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1218
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1219 if (! known_bracket_exp)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1220 return BACKREF;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1221
18752
82d233900292 dfa: make [0-9] faster in non-C locales
Paul Eggert <eggert@cs.ucla.edu>
parents: 18679
diff changeset
1222 if (dfa->localeinfo.multibyte && (invert || dfa->lex.brack.nchars != 0))
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1223 {
18620
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1224 dfa->lex.brack.invert = invert;
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1225 dfa->lex.brack.cset = emptyset (&ccl) ? -1 : charclass_index (dfa, &ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1226 return MBCSET;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1227 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1228
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1229 if (invert)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1230 {
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1231 notset (&ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1232 if (dfa->syntax.syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1233 clrbit ('\n', &ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1234 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1235
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1236 return CSET + charclass_index (dfa, &ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1237 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1238
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1239 struct lexptr
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1240 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1241 char const *ptr;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1242 size_t left;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1243 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1244
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1245 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1246 push_lex_state (struct dfa *dfa, struct lexptr *ls, char const *s)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1247 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1248 ls->ptr = dfa->lex.ptr;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1249 ls->left = dfa->lex.left;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1250 dfa->lex.ptr = s;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1251 dfa->lex.left = strlen (s);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1252 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1253
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1254 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1255 pop_lex_state (struct dfa *dfa, struct lexptr const *ls)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1256 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1257 dfa->lex.ptr = ls->ptr;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1258 dfa->lex.left = ls->left;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1259 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1260
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1261 static token
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1262 lex (struct dfa *dfa)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1263 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1264 bool backslash = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1265
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1266 /* Basic plan: We fetch a character. If it's a backslash,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1267 we set the backslash flag and go through the loop again.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1268 On the plus side, this avoids having a duplicate of the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1269 main switch inside the backslash case. On the minus side,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1270 it means that just about every case begins with
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1271 "if (backslash) ...". */
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1272 for (int i = 0; i < 2; ++i)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1273 {
18634
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1274 if (! dfa->lex.left)
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1275 return dfa->lex.lasttok = END;
a2fc5a686baf dfa: prefer functions to FETCH_WC macro
Paul Eggert <eggert@cs.ucla.edu>
parents: 18633
diff changeset
1276 int c = fetch_wc (dfa);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1277
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1278 switch (c)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1279 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1280 case '\\':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1281 if (backslash)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1282 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1283 if (dfa->lex.left == 0)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1284 dfaerror (_("unfinished \\ escape"));
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1285 backslash = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1286 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1287
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1288 case '^':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1289 if (backslash)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1290 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1291 if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1292 || dfa->lex.lasttok == END || dfa->lex.lasttok == LPAREN
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1293 || dfa->lex.lasttok == OR)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1294 return dfa->lex.lasttok = BEGLINE;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1295 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1296
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1297 case '$':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1298 if (backslash)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1299 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1300 if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1301 || dfa->lex.left == 0
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1302 || ((dfa->lex.left
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1303 > !(dfa->syntax.syntax_bits & RE_NO_BK_PARENS))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1304 && (dfa->lex.ptr[!(dfa->syntax.syntax_bits & RE_NO_BK_PARENS)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1305 & (dfa->lex.ptr[0] == '\\')]
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1306 == ')'))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1307 || ((dfa->lex.left
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1308 > !(dfa->syntax.syntax_bits & RE_NO_BK_VBAR))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1309 && (dfa->lex.ptr[!(dfa->syntax.syntax_bits & RE_NO_BK_VBAR)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1310 & (dfa->lex.ptr[0] == '\\')]
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1311 == '|'))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1312 || ((dfa->syntax.syntax_bits & RE_NEWLINE_ALT)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1313 && dfa->lex.left > 0 && dfa->lex.ptr[0] == '\n'))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1314 return dfa->lex.lasttok = ENDLINE;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1315 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1316
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1317 case '1':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1318 case '2':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1319 case '3':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1320 case '4':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1321 case '5':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1322 case '6':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1323 case '7':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1324 case '8':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1325 case '9':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1326 if (backslash && !(dfa->syntax.syntax_bits & RE_NO_BK_REFS))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1327 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1328 dfa->lex.laststart = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1329 return dfa->lex.lasttok = BACKREF;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1330 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1331 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1332
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1333 case '`':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1334 if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1335 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1336 /* FIXME: should be beginning of string */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1337 return dfa->lex.lasttok = BEGLINE;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1338 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1339 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1340
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1341 case '\'':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1342 if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1343 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1344 /* FIXME: should be end of string */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1345 return dfa->lex.lasttok = ENDLINE;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1346 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1347 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1348
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1349 case '<':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1350 if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1351 return dfa->lex.lasttok = BEGWORD;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1352 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1353
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1354 case '>':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1355 if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1356 return dfa->lex.lasttok = ENDWORD;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1357 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1358
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1359 case 'b':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1360 if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1361 return dfa->lex.lasttok = LIMWORD;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1362 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1363
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1364 case 'B':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1365 if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1366 return dfa->lex.lasttok = NOTLIMWORD;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1367 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1368
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1369 case '?':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1370 if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1371 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1372 if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1373 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1374 if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1375 && dfa->lex.laststart)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1376 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1377 return dfa->lex.lasttok = QMARK;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1378
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1379 case '*':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1380 if (backslash)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1381 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1382 if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1383 && dfa->lex.laststart)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1384 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1385 return dfa->lex.lasttok = STAR;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1386
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1387 case '+':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1388 if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1389 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1390 if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1391 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1392 if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1393 && dfa->lex.laststart)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1394 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1395 return dfa->lex.lasttok = PLUS;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1396
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1397 case '{':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1398 if (!(dfa->syntax.syntax_bits & RE_INTERVALS))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1399 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1400 if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_BRACES) == 0))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1401 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1402 if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1403 && dfa->lex.laststart)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1404 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1405
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1406 /* Cases:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1407 {M} - exact count
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1408 {M,} - minimum count, maximum is infinity
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1409 {,N} - 0 through N
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1410 {,} - 0 to infinity (same as '*')
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1411 {M,N} - M through N */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1412 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1413 char const *p = dfa->lex.ptr;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1414 char const *lim = p + dfa->lex.left;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1415 dfa->lex.minrep = dfa->lex.maxrep = -1;
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
1416 for (; p != lim && isasciidigit (*p); p++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1417 dfa->lex.minrep = (dfa->lex.minrep < 0
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1418 ? *p - '0'
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1419 : MIN (RE_DUP_MAX + 1,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1420 dfa->lex.minrep * 10 + *p - '0'));
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1421 if (p != lim)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1422 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1423 if (*p != ',')
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1424 dfa->lex.maxrep = dfa->lex.minrep;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1425 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1426 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1427 if (dfa->lex.minrep < 0)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1428 dfa->lex.minrep = 0;
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
1429 while (++p != lim && isasciidigit (*p))
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1430 dfa->lex.maxrep
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1431 = (dfa->lex.maxrep < 0
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1432 ? *p - '0'
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1433 : MIN (RE_DUP_MAX + 1,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1434 dfa->lex.maxrep * 10 + *p - '0'));
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1435 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1436 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1437 if (! ((! backslash || (p != lim && *p++ == '\\'))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1438 && p != lim && *p++ == '}'
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1439 && 0 <= dfa->lex.minrep
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1440 && (dfa->lex.maxrep < 0
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1441 || dfa->lex.minrep <= dfa->lex.maxrep)))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1442 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1443 if (dfa->syntax.syntax_bits & RE_INVALID_INTERVAL_ORD)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1444 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1445 dfaerror (_("invalid content of \\{\\}"));
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1446 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1447 if (RE_DUP_MAX < dfa->lex.maxrep)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1448 dfaerror (_("regular expression too big"));
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1449 dfa->lex.ptr = p;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1450 dfa->lex.left = lim - p;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1451 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1452 dfa->lex.laststart = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1453 return dfa->lex.lasttok = REPMN;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1454
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1455 case '|':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1456 if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1457 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1458 if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_VBAR) == 0))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1459 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1460 dfa->lex.laststart = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1461 return dfa->lex.lasttok = OR;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1462
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1463 case '\n':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1464 if (dfa->syntax.syntax_bits & RE_LIMITED_OPS
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1465 || backslash || !(dfa->syntax.syntax_bits & RE_NEWLINE_ALT))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1466 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1467 dfa->lex.laststart = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1468 return dfa->lex.lasttok = OR;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1469
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1470 case '(':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1471 if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1472 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1473 dfa->lex.parens++;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1474 dfa->lex.laststart = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1475 return dfa->lex.lasttok = LPAREN;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1476
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1477 case ')':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1478 if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1479 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1480 if (dfa->lex.parens == 0
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1481 && dfa->syntax.syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1482 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1483 dfa->lex.parens--;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1484 dfa->lex.laststart = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1485 return dfa->lex.lasttok = RPAREN;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1486
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1487 case '.':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1488 if (backslash)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1489 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1490 if (dfa->canychar == (size_t) -1)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1491 {
18633
42cabb9832cd dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18632
diff changeset
1492 charclass ccl;
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1493 fillset (&ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1494 if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE))
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1495 clrbit ('\n', &ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1496 if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1497 clrbit ('\0', &ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1498 if (dfa->localeinfo.multibyte)
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1499 for (int c2 = 0; c2 < NOTCHAR; c2++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1500 if (dfa->localeinfo.sbctowc[c2] == WEOF)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1501 clrbit (c2, &ccl);
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1502 dfa->canychar = charclass_index (dfa, &ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1503 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1504 dfa->lex.laststart = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1505 return dfa->lex.lasttok = (dfa->localeinfo.multibyte
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1506 ? ANYCHAR
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1507 : CSET + dfa->canychar);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1508
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1509 case 's':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1510 case 'S':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1511 if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1512 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1513 if (!dfa->localeinfo.multibyte)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1514 {
18633
42cabb9832cd dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18632
diff changeset
1515 charclass ccl;
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1516 zeroset (&ccl);
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1517 for (int c2 = 0; c2 < NOTCHAR; ++c2)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1518 if (isspace (c2))
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1519 setbit (c2, &ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1520 if (c == 'S')
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1521 notset (&ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1522 dfa->lex.laststart = false;
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1523 return dfa->lex.lasttok = CSET + charclass_index (dfa, &ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1524 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1525
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1526 /* FIXME: see if optimizing this, as is done with ANYCHAR and
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1527 add_utf8_anychar, makes sense. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1528
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1529 /* \s and \S are documented to be equivalent to [[:space:]] and
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1530 [^[:space:]] respectively, so tell the lexer to process those
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1531 strings, each minus its "already processed" '['. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1532 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1533 struct lexptr ls;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1534 push_lex_state (dfa, &ls, &"^[:space:]]"[c == 's']);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1535 dfa->lex.lasttok = parse_bracket_exp (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1536 pop_lex_state (dfa, &ls);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1537 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1538
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1539 dfa->lex.laststart = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1540 return dfa->lex.lasttok;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1541
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1542 case 'w':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1543 case 'W':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1544 if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1545 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1546
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1547 if (!dfa->localeinfo.multibyte)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1548 {
18633
42cabb9832cd dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18632
diff changeset
1549 charclass ccl;
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1550 zeroset (&ccl);
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1551 for (int c2 = 0; c2 < NOTCHAR; ++c2)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1552 if (dfa->syntax.sbit[c2] == CTX_LETTER)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1553 setbit (c2, &ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1554 if (c == 'W')
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1555 notset (&ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1556 dfa->lex.laststart = false;
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1557 return dfa->lex.lasttok = CSET + charclass_index (dfa, &ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1558 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1559
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1560 /* FIXME: see if optimizing this, as is done with ANYCHAR and
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1561 add_utf8_anychar, makes sense. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1562
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1563 /* \w and \W are documented to be equivalent to [_[:alnum:]] and
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1564 [^_[:alnum:]] respectively, so tell the lexer to process those
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1565 strings, each minus its "already processed" '['. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1566 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1567 struct lexptr ls;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1568 push_lex_state (dfa, &ls, &"^_[:alnum:]]"[c == 'w']);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1569 dfa->lex.lasttok = parse_bracket_exp (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1570 pop_lex_state (dfa, &ls);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1571 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1572
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1573 dfa->lex.laststart = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1574 return dfa->lex.lasttok;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1575
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1576 case '[':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1577 if (backslash)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1578 goto normal_char;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1579 dfa->lex.laststart = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1580 return dfa->lex.lasttok = parse_bracket_exp (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1581
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1582 default:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1583 normal_char:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1584 dfa->lex.laststart = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1585 /* For multibyte character sets, folding is done in atom. Always
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1586 return WCHAR. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1587 if (dfa->localeinfo.multibyte)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1588 return dfa->lex.lasttok = WCHAR;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1589
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1590 if (dfa->syntax.case_fold && isalpha (c))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1591 {
18633
42cabb9832cd dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18632
diff changeset
1592 charclass ccl;
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1593 zeroset (&ccl);
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1594 setbit_case_fold_c (c, &ccl);
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1595 return dfa->lex.lasttok = CSET + charclass_index (dfa, &ccl);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1596 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1597
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1598 return dfa->lex.lasttok = c;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1599 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1600 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1601
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1602 /* The above loop should consume at most a backslash
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1603 and some other character. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1604 abort ();
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1605 return END; /* keeps pedantic compilers happy. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1606 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1607
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1608 static void
18620
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1609 addtok_mb (struct dfa *dfa, token t, char mbprop)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1610 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1611 if (dfa->talloc == dfa->tindex)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1612 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1613 dfa->tokens = x2nrealloc (dfa->tokens, &dfa->talloc,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1614 sizeof *dfa->tokens);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1615 if (dfa->localeinfo.multibyte)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1616 dfa->multibyte_prop = xnrealloc (dfa->multibyte_prop, dfa->talloc,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1617 sizeof *dfa->multibyte_prop);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1618 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1619 if (dfa->localeinfo.multibyte)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1620 dfa->multibyte_prop[dfa->tindex] = mbprop;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1621 dfa->tokens[dfa->tindex++] = t;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1622
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1623 switch (t)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1624 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1625 case QMARK:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1626 case STAR:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1627 case PLUS:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1628 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1629
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1630 case CAT:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1631 case OR:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1632 dfa->parse.depth--;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1633 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1634
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1635 case BACKREF:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1636 dfa->fast = false;
18914
886945d1fa95 manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents: 18752
diff changeset
1637 FALLTHROUGH;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1638 default:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1639 dfa->nleaves++;
18914
886945d1fa95 manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents: 18752
diff changeset
1640 FALLTHROUGH;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1641 case EMPTY:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1642 dfa->parse.depth++;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1643 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1644 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1645 if (dfa->parse.depth > dfa->depth)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1646 dfa->depth = dfa->parse.depth;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1647 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1648
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1649 static void addtok_wc (struct dfa *dfa, wint_t wc);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1650
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1651 /* Add the given token to the parse tree, maintaining the depth count and
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1652 updating the maximum depth if necessary. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1653 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1654 addtok (struct dfa *dfa, token t)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1655 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1656 if (dfa->localeinfo.multibyte && t == MBCSET)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1657 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1658 bool need_or = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1659
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1660 /* Extract wide characters into alternations for better performance.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1661 This does not require UTF-8. */
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
1662 for (ptrdiff_t i = 0; i < dfa->lex.brack.nchars; i++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1663 {
18620
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1664 addtok_wc (dfa, dfa->lex.brack.chars[i]);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1665 if (need_or)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1666 addtok (dfa, OR);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1667 need_or = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1668 }
18620
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1669 dfa->lex.brack.nchars = 0;
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1670
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1671 /* Wide characters have been handled above, so it is possible
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1672 that the set is empty now. Do nothing in that case. */
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1673 if (dfa->lex.brack.cset != -1)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1674 {
18620
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
1675 addtok (dfa, CSET + dfa->lex.brack.cset);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1676 if (need_or)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1677 addtok (dfa, OR);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1678 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1679 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1680 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1681 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1682 addtok_mb (dfa, t, 3);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1683 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1684 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1685
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1686 /* We treat a multibyte character as a single atom, so that DFA
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1687 can treat a multibyte character as a single expression.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1688
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1689 e.g., we construct the following tree from "<mb1><mb2>".
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1690 <mb1(1st-byte)><mb1(2nd-byte)><CAT><mb1(3rd-byte)><CAT>
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1691 <mb2(1st-byte)><mb2(2nd-byte)><CAT><mb2(3rd-byte)><CAT><CAT> */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1692 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1693 addtok_wc (struct dfa *dfa, wint_t wc)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1694 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1695 unsigned char buf[MB_LEN_MAX];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1696 mbstate_t s = { 0 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1697 size_t stored_bytes = wcrtomb ((char *) buf, wc, &s);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1698
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1699 if (stored_bytes != (size_t) -1)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1700 dfa->lex.cur_mb_len = stored_bytes;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1701 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1702 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1703 /* This is merely stop-gap. buf[0] is undefined, yet skipping
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1704 the addtok_mb call altogether can corrupt the heap. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1705 dfa->lex.cur_mb_len = 1;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1706 buf[0] = 0;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1707 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1708
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1709 addtok_mb (dfa, buf[0], dfa->lex.cur_mb_len == 1 ? 3 : 1);
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1710 for (int i = 1; i < dfa->lex.cur_mb_len; i++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1711 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1712 addtok_mb (dfa, buf[i], i == dfa->lex.cur_mb_len - 1 ? 2 : 0);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1713 addtok (dfa, CAT);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1714 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1715 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1716
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1717 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1718 add_utf8_anychar (struct dfa *dfa)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1719 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1720 static charclass const utf8_classes[5] = {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1721 /* 80-bf: non-leading bytes. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1722 CHARCLASS_INIT (0, 0, 0, 0, 0xffffffff, 0xffffffff, 0, 0),
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1723
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1724 /* 00-7f: 1-byte sequence. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1725 CHARCLASS_INIT (0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0, 0, 0, 0),
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1726
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1727 /* c2-df: 2-byte sequence. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1728 CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0xfffffffc, 0),
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1729
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1730 /* e0-ef: 3-byte sequence. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1731 CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0, 0xffff),
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1732
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1733 /* f0-f7: 4-byte sequence. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1734 CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0, 0xff0000)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1735 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1736 const unsigned int n = sizeof (utf8_classes) / sizeof (utf8_classes[0]);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1737
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1738 /* Define the five character classes that are needed below. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1739 if (dfa->utf8_anychar_classes[0] == 0)
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1740 for (unsigned int i = 0; i < n; i++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1741 {
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1742 charclass c = utf8_classes[i];
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1743 if (i == 1)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1744 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1745 if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE))
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1746 clrbit ('\n', &c);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1747 if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1748 clrbit ('\0', &c);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1749 }
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
1750 dfa->utf8_anychar_classes[i] = CSET + charclass_index (dfa, &c);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1751 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1752
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1753 /* A valid UTF-8 character is
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1754
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1755 ([0x00-0x7f]
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1756 |[0xc2-0xdf][0x80-0xbf]
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1757 |[0xe0-0xef[0x80-0xbf][0x80-0xbf]
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1758 |[0xf0-f7][0x80-0xbf][0x80-0xbf][0x80-0xbf])
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1759
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1760 which I'll write more concisely "B|CA|DAA|EAAA". Factor the [0x00-0x7f]
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1761 and you get "B|(C|(D|EA)A)A". And since the token buffer is in reverse
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1762 Polish notation, you get "B C D E A CAT OR A CAT OR A CAT OR". */
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1763 unsigned int i;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1764 for (i = 1; i < n; i++)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1765 addtok (dfa, dfa->utf8_anychar_classes[i]);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1766 while (--i > 1)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1767 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1768 addtok (dfa, dfa->utf8_anychar_classes[0]);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1769 addtok (dfa, CAT);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1770 addtok (dfa, OR);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1771 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1772 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1773
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1774 /* The grammar understood by the parser is as follows.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1775
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1776 regexp:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1777 regexp OR branch
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1778 branch
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1779
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1780 branch:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1781 branch closure
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1782 closure
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1783
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1784 closure:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1785 closure QMARK
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1786 closure STAR
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1787 closure PLUS
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1788 closure REPMN
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1789 atom
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1790
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1791 atom:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1792 <normal character>
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1793 <multibyte character>
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1794 ANYCHAR
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1795 MBCSET
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1796 CSET
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1797 BACKREF
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1798 BEGLINE
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1799 ENDLINE
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1800 BEGWORD
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1801 ENDWORD
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1802 LIMWORD
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1803 NOTLIMWORD
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1804 LPAREN regexp RPAREN
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1805 <empty>
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1806
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1807 The parser builds a parse tree in postfix form in an array of tokens. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1808
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1809 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1810 atom (struct dfa *dfa)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1811 {
39857
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1812 if ((0 <= dfa->parse.tok && dfa->parse.tok < NOTCHAR)
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1813 || dfa->parse.tok >= CSET
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1814 || dfa->parse.tok == BEG || dfa->parse.tok == BACKREF
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1815 || dfa->parse.tok == BEGLINE || dfa->parse.tok == ENDLINE
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1816 || dfa->parse.tok == BEGWORD || dfa->parse.tok == ENDWORD
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1817 || dfa->parse.tok == LIMWORD || dfa->parse.tok == NOTLIMWORD
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1818 || dfa->parse.tok == ANYCHAR || dfa->parse.tok == MBCSET)
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1819 {
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1820 if (dfa->parse.tok == ANYCHAR && dfa->localeinfo.using_utf8)
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1821 {
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1822 /* For UTF-8 expand the period to a series of CSETs that define a
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1823 valid UTF-8 character. This avoids using the slow multibyte
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1824 path. I'm pretty sure it would be both profitable and correct to
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1825 do it for any encoding; however, the optimization must be done
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1826 manually as it is done above in add_utf8_anychar. So, let's
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1827 start with UTF-8: it is the most used, and the structure of the
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1828 encoding makes the correctness more obvious. */
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1829 add_utf8_anychar (dfa);
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1830 }
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1831 else
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1832 addtok (dfa, dfa->parse.tok);
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1833 dfa->parse.tok = lex (dfa);
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1834 }
b0bc3272b80e dfa: reorder enum for efficiency
Paul Eggert <eggert@cs.ucla.edu>
parents: 39856
diff changeset
1835 else if (dfa->parse.tok == WCHAR)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1836 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1837 if (dfa->lex.wctok == WEOF)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1838 addtok (dfa, BACKREF);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1839 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1840 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1841 addtok_wc (dfa, dfa->lex.wctok);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1842
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1843 if (dfa->syntax.case_fold)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1844 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1845 wchar_t folded[CASE_FOLDED_BUFSIZE];
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1846 unsigned int n = case_folded_counterparts (dfa->lex.wctok,
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1847 folded);
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1848 for (unsigned int i = 0; i < n; i++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1849 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1850 addtok_wc (dfa, folded[i]);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1851 addtok (dfa, OR);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1852 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1853 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1854 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1855
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1856 dfa->parse.tok = lex (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1857 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1858 else if (dfa->parse.tok == LPAREN)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1859 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1860 dfa->parse.tok = lex (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1861 regexp (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1862 if (dfa->parse.tok != RPAREN)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1863 dfaerror (_("unbalanced ("));
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1864 dfa->parse.tok = lex (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1865 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1866 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1867 addtok (dfa, EMPTY);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1868 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1869
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1870 /* Return the number of tokens in the given subexpression. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1871 static size_t _GL_ATTRIBUTE_PURE
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1872 nsubtoks (struct dfa const *dfa, size_t tindex)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1873 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1874 switch (dfa->tokens[tindex - 1])
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1875 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1876 default:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1877 return 1;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1878 case QMARK:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1879 case STAR:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1880 case PLUS:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1881 return 1 + nsubtoks (dfa, tindex - 1);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1882 case CAT:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1883 case OR:
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
1884 {
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
1885 size_t ntoks1 = nsubtoks (dfa, tindex - 1);
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
1886 return 1 + ntoks1 + nsubtoks (dfa, tindex - 1 - ntoks1);
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
1887 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1888 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1889 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1890
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1891 /* Copy the given subexpression to the top of the tree. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1892 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1893 copytoks (struct dfa *dfa, size_t tindex, size_t ntokens)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1894 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1895 if (dfa->localeinfo.multibyte)
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1896 for (size_t i = 0; i < ntokens; ++i)
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1897 addtok_mb (dfa, dfa->tokens[tindex + i],
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1898 dfa->multibyte_prop[tindex + i]);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1899 else
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1900 for (size_t i = 0; i < ntokens; ++i)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1901 addtok_mb (dfa, dfa->tokens[tindex + i], 3);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1902 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1903
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1904 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1905 closure (struct dfa *dfa)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1906 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1907 atom (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1908 while (dfa->parse.tok == QMARK || dfa->parse.tok == STAR
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1909 || dfa->parse.tok == PLUS || dfa->parse.tok == REPMN)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1910 if (dfa->parse.tok == REPMN && (dfa->lex.minrep || dfa->lex.maxrep))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1911 {
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1912 size_t ntokens = nsubtoks (dfa, dfa->tindex);
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1913 size_t tindex = dfa->tindex - ntokens;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1914 if (dfa->lex.maxrep < 0)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1915 addtok (dfa, PLUS);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1916 if (dfa->lex.minrep == 0)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1917 addtok (dfa, QMARK);
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
1918 int i;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1919 for (i = 1; i < dfa->lex.minrep; i++)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1920 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1921 copytoks (dfa, tindex, ntokens);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1922 addtok (dfa, CAT);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1923 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1924 for (; i < dfa->lex.maxrep; i++)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1925 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1926 copytoks (dfa, tindex, ntokens);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1927 addtok (dfa, QMARK);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1928 addtok (dfa, CAT);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1929 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1930 dfa->parse.tok = lex (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1931 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1932 else if (dfa->parse.tok == REPMN)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1933 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1934 dfa->tindex -= nsubtoks (dfa, dfa->tindex);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1935 dfa->parse.tok = lex (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1936 closure (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1937 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1938 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1939 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1940 addtok (dfa, dfa->parse.tok);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1941 dfa->parse.tok = lex (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1942 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1943 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1944
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1945 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1946 branch (struct dfa* dfa)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1947 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1948 closure (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1949 while (dfa->parse.tok != RPAREN && dfa->parse.tok != OR
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1950 && dfa->parse.tok >= 0)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1951 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1952 closure (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1953 addtok (dfa, CAT);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1954 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1955 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1956
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1957 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1958 regexp (struct dfa *dfa)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1959 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1960 branch (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1961 while (dfa->parse.tok == OR)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1962 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1963 dfa->parse.tok = lex (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1964 branch (dfa);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1965 addtok (dfa, OR);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1966 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1967 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1968
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1969 /* Main entry point for the parser. S is a string to be parsed, len is the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1970 length of the string, so s can include NUL characters. D is a pointer to
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1971 the struct dfa to parse into. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1972 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1973 dfaparse (char const *s, size_t len, struct dfa *d)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1974 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1975 d->lex.ptr = s;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1976 d->lex.left = len;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1977 d->lex.lasttok = END;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1978 d->lex.laststart = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1979
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1980 if (!d->syntax.syntax_bits_set)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1981 dfaerror (_("no syntax specified"));
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1982
39855
a29036ff511d dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39760
diff changeset
1983 if (!d->nregexps)
a29036ff511d dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39760
diff changeset
1984 addtok (d, BEG);
a29036ff511d dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39760
diff changeset
1985
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1986 d->parse.tok = lex (d);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1987 d->parse.depth = d->depth;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1988
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1989 regexp (d);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1990
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1991 if (d->parse.tok != END)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1992 dfaerror (_("unbalanced )"));
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1993
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1994 addtok (d, END - d->nregexps);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1995 addtok (d, CAT);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1996
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1997 if (d->nregexps)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1998 addtok (d, OR);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
1999
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2000 ++d->nregexps;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2001 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2002
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2003 /* Some primitives for operating on sets of positions. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2004
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2005 /* Copy one set to another. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2006 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2007 copy (position_set const *src, position_set *dst)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2008 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2009 if (dst->alloc < src->nelem)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2010 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2011 free (dst->elems);
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
2012 dst->elems = xpalloc (NULL, &dst->alloc, src->nelem - dst->alloc, -1,
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
2013 sizeof *dst->elems);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2014 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2015 dst->nelem = src->nelem;
18673
8321b350b6d7 dfa: port to gcc -fsanitize=undefined
Paul Eggert <eggert@cs.ucla.edu>
parents: 18668
diff changeset
2016 if (src->nelem != 0)
8321b350b6d7 dfa: port to gcc -fsanitize=undefined
Paul Eggert <eggert@cs.ucla.edu>
parents: 18668
diff changeset
2017 memcpy (dst->elems, src->elems, src->nelem * sizeof *dst->elems);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2018 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2019
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2020 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2021 alloc_position_set (position_set *s, size_t size)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2022 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2023 s->elems = xnmalloc (size, sizeof *s->elems);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2024 s->alloc = size;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2025 s->nelem = 0;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2026 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2027
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2028 /* Insert position P in set S. S is maintained in sorted order on
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2029 decreasing index. If there is already an entry in S with P.index
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2030 then merge (logically-OR) P's constraints into the one in S.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2031 S->elems must point to an array large enough to hold the resulting set. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2032 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2033 insert (position p, position_set *s)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2034 {
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
2035 ptrdiff_t count = s->nelem;
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
2036 ptrdiff_t lo = 0, hi = count;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2037 while (lo < hi)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2038 {
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
2039 ptrdiff_t mid = (lo + hi) >> 1;
39954
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2040 if (s->elems[mid].index < p.index)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2041 lo = mid + 1;
18608
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2042 else if (s->elems[mid].index == p.index)
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2043 {
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2044 s->elems[mid].constraint |= p.constraint;
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2045 return;
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2046 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2047 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2048 hi = mid;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2049 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2050
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
2051 s->elems = maybe_realloc (s->elems, count, &s->alloc, -1, sizeof *s->elems);
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2052 for (ptrdiff_t i = count; i > lo; i--)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2053 s->elems[i] = s->elems[i - 1];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2054 s->elems[lo] = p;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2055 ++s->nelem;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2056 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2057
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2058 static void
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2059 append (position p, position_set *s)
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2060 {
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2061 ptrdiff_t count = s->nelem;
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2062 s->elems = maybe_realloc (s->elems, count, &s->alloc, -1, sizeof *s->elems);
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2063 s->elems[s->nelem++] = p;
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2064 }
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2065
18608
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2066 /* Merge S1 and S2 (with the additional constraint C2) into M. The
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2067 result is as if the positions of S1, and of S2 with the additional
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2068 constraint C2, were inserted into an initially empty set. */
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2069 static void
18608
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2070 merge_constrained (position_set const *s1, position_set const *s2,
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2071 unsigned int c2, position_set *m)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2072 {
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
2073 ptrdiff_t i = 0, j = 0;
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
2074
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
2075 if (m->alloc - s1->nelem < s2->nelem)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2076 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2077 free (m->elems);
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
2078 m->alloc = s1->nelem;
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
2079 m->elems = xpalloc (NULL, &m->alloc, s2->nelem, -1, sizeof *m->elems);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2080 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2081 m->nelem = 0;
18608
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2082 while (i < s1->nelem || j < s2->nelem)
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2083 if (! (j < s2->nelem)
39954
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2084 || (i < s1->nelem && s1->elems[i].index <= s2->elems[j].index))
18608
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2085 {
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2086 unsigned int c = ((i < s1->nelem && j < s2->nelem
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2087 && s1->elems[i].index == s2->elems[j].index)
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2088 ? s2->elems[j++].constraint & c2
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2089 : 0);
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2090 m->elems[m->nelem].index = s1->elems[i].index;
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2091 m->elems[m->nelem++].constraint = s1->elems[i++].constraint | c;
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2092 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2093 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2094 {
18608
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2095 if (s2->elems[j].constraint & c2)
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2096 {
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2097 m->elems[m->nelem].index = s2->elems[j].index;
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2098 m->elems[m->nelem++].constraint = s2->elems[j].constraint & c2;
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2099 }
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2100 j++;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2101 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2102 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2103
18608
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2104 /* Merge two sets of positions into a third. The result is exactly as if
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2105 the positions of both sets were inserted into an initially empty set. */
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2106 static void
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2107 merge (position_set const *s1, position_set const *s2, position_set *m)
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2108 {
18651
645e8282eb81 dfa: fix 'return' typo
Paul Eggert <eggert@cs.ucla.edu>
parents: 18634
diff changeset
2109 merge_constrained (s1, s2, -1, m);
18608
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2110 }
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2111
39862
f61cd4b41f21 dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39861
diff changeset
2112 static void
f61cd4b41f21 dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39861
diff changeset
2113 merge2 (position_set *dst, position_set const *src, position_set *m)
f61cd4b41f21 dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39861
diff changeset
2114 {
f61cd4b41f21 dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39861
diff changeset
2115 if (src->nelem < 4)
f61cd4b41f21 dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39861
diff changeset
2116 {
f61cd4b41f21 dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39861
diff changeset
2117 for (ptrdiff_t i = 0; i < src->nelem; ++i)
f61cd4b41f21 dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39861
diff changeset
2118 insert (src->elems[i], dst);
f61cd4b41f21 dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39861
diff changeset
2119 }
f61cd4b41f21 dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39861
diff changeset
2120 else
f61cd4b41f21 dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39861
diff changeset
2121 {
f61cd4b41f21 dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39861
diff changeset
2122 merge (src, dst, m);
f61cd4b41f21 dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39861
diff changeset
2123 copy (m, dst);
f61cd4b41f21 dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39861
diff changeset
2124 }
f61cd4b41f21 dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39861
diff changeset
2125 }
f61cd4b41f21 dfa: optimization for state merge
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39861
diff changeset
2126
18608
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2127 /* Delete a position from a set. Return the nonzero constraint of the
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2128 deleted position, or zero if there was no such position. */
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2129 static unsigned int
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2130 delete (size_t del, position_set *s)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2131 {
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2132 size_t count = s->nelem;
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2133 size_t lo = 0, hi = count;
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2134 while (lo < hi)
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2135 {
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2136 size_t mid = (lo + hi) >> 1;
39954
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2137 if (s->elems[mid].index < del)
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2138 lo = mid + 1;
18608
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2139 else if (s->elems[mid].index == del)
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2140 {
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2141 unsigned int c = s->elems[mid].constraint;
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2142 size_t i;
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2143 for (i = mid; i + 1 < count; i++)
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2144 s->elems[i] = s->elems[i + 1];
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2145 s->nelem = i;
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2146 return c;
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2147 }
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2148 else
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2149 hi = mid;
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2150 }
18608
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2151 return 0;
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2152 }
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2153
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2154 /* Replace a position with the followed set. */
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2155 static void
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2156 replace (position_set *dst, size_t del, position_set *add,
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2157 unsigned int constraint, position_set *tmp)
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2158 {
18608
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2159 unsigned int c = delete (del, dst) & constraint;
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2160
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2161 if (c)
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2162 {
18608
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2163 copy (dst, tmp);
4e21be41ec70 dfa: improve worst-case 'replace' performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 18607
diff changeset
2164 merge_constrained (tmp, add, c, dst);
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2165 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2166 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2167
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2168 /* Find the index of the state corresponding to the given position set with
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2169 the given preceding context, or create a new state if there is no such
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2170 state. Context tells whether we got here on a newline or letter. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2171 static state_num
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2172 state_index (struct dfa *d, position_set const *s, int context)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2173 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2174 size_t hash = 0;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2175 int constraint = 0;
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2176 state_num i;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2177 token first_end = 0;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2178
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2179 for (i = 0; i < s->nelem; ++i)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2180 hash ^= s->elems[i].index + s->elems[i].constraint;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2181
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2182 /* Try to find a state that exactly matches the proposed one. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2183 for (i = 0; i < d->sindex; ++i)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2184 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2185 if (hash != d->states[i].hash || s->nelem != d->states[i].elems.nelem
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2186 || context != d->states[i].context)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2187 continue;
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2188 state_num j;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2189 for (j = 0; j < s->nelem; ++j)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2190 if (s->elems[j].constraint != d->states[i].elems.elems[j].constraint
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2191 || s->elems[j].index != d->states[i].elems.elems[j].index)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2192 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2193 if (j == s->nelem)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2194 return i;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2195 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2196
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2197 #ifdef DEBUG
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2198 fprintf (stderr, "new state %zd\n nextpos:", i);
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2199 for (state_num j = 0; j < s->nelem; j++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2200 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2201 fprintf (stderr, " %zu:", s->elems[j].index);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2202 prtok (d->tokens[s->elems[j].index]);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2203 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2204 fprintf (stderr, "\n context:");
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2205 if (context ^ CTX_ANY)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2206 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2207 if (context & CTX_NONE)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2208 fprintf (stderr, " CTX_NONE");
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2209 if (context & CTX_LETTER)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2210 fprintf (stderr, " CTX_LETTER");
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2211 if (context & CTX_NEWLINE)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2212 fprintf (stderr, " CTX_NEWLINE");
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2213 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2214 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2215 fprintf (stderr, " CTX_ANY");
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2216 fprintf (stderr, "\n");
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2217 #endif
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2218
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2219 for (state_num j = 0; j < s->nelem; j++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2220 {
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2221 int c = d->constraints[s->elems[j].index];
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2222
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2223 if (c != 0)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2224 {
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
2225 if (succeeds_in_context (c, context, CTX_ANY))
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2226 constraint |= c;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2227 if (!first_end)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2228 first_end = d->tokens[s->elems[j].index];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2229 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2230 else if (d->tokens[s->elems[j].index] == BACKREF)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2231 constraint = NO_CONSTRAINT;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2232 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2233
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2234
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2235 /* Create a new state. */
18559
900819251d51 dfa: fix some unlikely integer overflows
Paul Eggert <eggert@cs.ucla.edu>
parents: 18558
diff changeset
2236 d->states = maybe_realloc (d->states, d->sindex, &d->salloc, -1,
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2237 sizeof *d->states);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2238 d->states[i].hash = hash;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2239 alloc_position_set (&d->states[i].elems, s->nelem);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2240 copy (s, &d->states[i].elems);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2241 d->states[i].context = context;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2242 d->states[i].constraint = constraint;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2243 d->states[i].first_end = first_end;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2244 d->states[i].mbps.nelem = 0;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2245 d->states[i].mbps.elems = NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2246 d->states[i].mb_trindex = -1;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2247
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2248 ++d->sindex;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2249
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2250 return i;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2251 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2252
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2253 /* Find the epsilon closure of a set of positions. If any position of the set
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2254 contains a symbol that matches the empty string in some context, replace
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2255 that position with the elements of its follow labeled with an appropriate
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2256 constraint. Repeat exhaustively until no funny positions are left.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2257 S->elems must be large enough to hold the result. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2258 static void
39953
2f4c84e23e3c dfa: remove unneeded code
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39862
diff changeset
2259 epsclosure (struct dfa const *d)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2260 {
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2261 position_set tmp;
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2262 alloc_position_set (&tmp, d->nleaves);
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2263 for (size_t i = 0; i < d->tindex; ++i)
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2264 if (d->follows[i].nelem > 0 && d->tokens[i] >= NOTCHAR
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2265 && d->tokens[i] != BACKREF && d->tokens[i] != ANYCHAR
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2266 && d->tokens[i] != MBCSET && d->tokens[i] < CSET)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2267 {
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2268 unsigned int constraint;
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2269 switch (d->tokens[i])
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2270 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2271 case BEGLINE:
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2272 constraint = BEGLINE_CONSTRAINT;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2273 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2274 case ENDLINE:
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2275 constraint = ENDLINE_CONSTRAINT;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2276 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2277 case BEGWORD:
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2278 constraint = BEGWORD_CONSTRAINT;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2279 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2280 case ENDWORD:
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2281 constraint = ENDWORD_CONSTRAINT;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2282 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2283 case LIMWORD:
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2284 constraint = LIMWORD_CONSTRAINT;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2285 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2286 case NOTLIMWORD:
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2287 constraint = NOTLIMWORD_CONSTRAINT;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2288 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2289 default:
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2290 constraint = NO_CONSTRAINT;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2291 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2292 }
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2293
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2294 delete (i, &d->follows[i]);
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2295
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2296 for (size_t j = 0; j < d->tindex; j++)
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2297 if (i != j && d->follows[j].nelem > 0)
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2298 replace (&d->follows[j], i, &d->follows[i], constraint, &tmp);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2299 }
18679
a68d8ef26d2a dfa: fix memory leak in parse
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18673
diff changeset
2300 free (tmp.elems);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2301 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2302
40047
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2303 /* Returns the set of contexts for which there is at least one
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2304 character included in C. */
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2305
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2306 static int
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2307 charclass_context (struct dfa const *dfa, charclass const *c)
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2308 {
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2309 int context = 0;
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2310
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2311 for (unsigned int j = 0; j < CHARCLASS_WORDS; ++j)
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2312 {
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2313 if (c->w[j] & dfa->syntax.newline.w[j])
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2314 context |= CTX_NEWLINE;
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2315 if (c->w[j] & dfa->syntax.letters.w[j])
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2316 context |= CTX_LETTER;
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2317 if (c->w[j] & ~(dfa->syntax.letters.w[j] | dfa->syntax.newline.w[j]))
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2318 context |= CTX_NONE;
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2319 }
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2320
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2321 return context;
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2322 }
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
2323
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2324 /* Returns the contexts on which the position set S depends. Each context
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2325 in the set of returned contexts (let's call it SC) may have a different
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2326 follow set than other contexts in SC, and also different from the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2327 follow set of the complement set (sc ^ CTX_ANY). However, all contexts
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2328 in the complement set will have the same follow set. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2329
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2330 static int _GL_ATTRIBUTE_PURE
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2331 state_separate_contexts (struct dfa *d, position_set const *s)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2332 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2333 int separate_contexts = 0;
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2334
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2335 for (size_t j = 0; j < s->nelem; j++)
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2336 separate_contexts |= d->separates[s->elems[j].index];
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2337
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2338 return separate_contexts;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2339 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2340
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2341 enum
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2342 {
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2343 /* Single token is repeated. It is distinguished from non-repeated. */
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2344 OPT_REPEAT = (1 << 0),
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2345
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2346 /* Multiple tokens are repeated. This flag is on at head of tokens. The
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2347 node is not merged. */
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2348 OPT_LPAREN = (1 << 1),
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2349
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2350 /* Multiple branches are joined. The node is not merged. */
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2351 OPT_RPAREN = (1 << 2),
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2352
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2353 /* The node is walked. If the node is found in walking again, OPT_RPAREN
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2354 flag is turned on. */
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2355 OPT_WALKED = (1 << 3),
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2356
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2357 /* The node is queued. The node is not queued again. */
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2358 OPT_QUEUED = (1 << 4)
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2359 };
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2360
39955
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2361 static void
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2362 merge_nfa_state (struct dfa *d, size_t tindex, char *flags,
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2363 position_set *merged)
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2364 {
39858
f1a9693c37be dfa: prune states as we go
Paul Eggert <eggert@cs.ucla.edu>
parents: 39857
diff changeset
2365 position_set *follows = d->follows;
f1a9693c37be dfa: prune states as we go
Paul Eggert <eggert@cs.ucla.edu>
parents: 39857
diff changeset
2366 ptrdiff_t nelem = 0;
f1a9693c37be dfa: prune states as we go
Paul Eggert <eggert@cs.ucla.edu>
parents: 39857
diff changeset
2367
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2368 d->constraints[tindex] = 0;
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2369
39858
f1a9693c37be dfa: prune states as we go
Paul Eggert <eggert@cs.ucla.edu>
parents: 39857
diff changeset
2370 for (ptrdiff_t i = 0; i < follows[tindex].nelem; i++)
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2371 {
39955
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2372 size_t sindex = follows[tindex].elems[i].index;
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2373
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2374 /* Skip the node as pruned in future. */
39858
f1a9693c37be dfa: prune states as we go
Paul Eggert <eggert@cs.ucla.edu>
parents: 39857
diff changeset
2375 unsigned int iconstraint = follows[tindex].elems[i].constraint;
f1a9693c37be dfa: prune states as we go
Paul Eggert <eggert@cs.ucla.edu>
parents: 39857
diff changeset
2376 if (iconstraint == 0)
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2377 continue;
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2378
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2379 if (d->tokens[follows[tindex].elems[i].index] <= END)
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2380 {
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2381 d->constraints[tindex] |= follows[tindex].elems[i].constraint;
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2382 continue;
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2383 }
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2384
39955
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2385 if (!(flags[sindex] & (OPT_LPAREN | OPT_RPAREN)))
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2386 {
39955
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2387 ptrdiff_t j;
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2388
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2389 for (j = 0; j < nelem; j++)
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2390 {
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2391 size_t dindex = follows[tindex].elems[j].index;
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2392
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2393 if (follows[tindex].elems[j].constraint != iconstraint)
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2394 continue;
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2395
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2396 if (flags[dindex] & (OPT_LPAREN | OPT_RPAREN))
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2397 continue;
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2398
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2399 if (d->tokens[sindex] != d->tokens[dindex])
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2400 continue;
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2401
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2402 if ((flags[sindex] ^ flags[dindex]) & OPT_REPEAT)
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2403 continue;
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2404
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2405 if (flags[sindex] & OPT_REPEAT)
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2406 delete (sindex, &follows[sindex]);
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2407
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2408 merge2 (&follows[dindex], &follows[sindex], merged);
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2409
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2410 break;
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2411 }
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2412
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2413 if (j < nelem)
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2414 continue;
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2415 }
39955
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2416
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2417 follows[tindex].elems[nelem++] = follows[tindex].elems[i];
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2418 flags[sindex] |= OPT_QUEUED;
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2419 }
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2420
39858
f1a9693c37be dfa: prune states as we go
Paul Eggert <eggert@cs.ucla.edu>
parents: 39857
diff changeset
2421 follows[tindex].nelem = nelem;
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2422 }
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2423
39957
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2424 static int
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2425 compare (const void *a, const void *b)
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2426 {
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2427 int aindex;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2428 int bindex;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2429
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2430 aindex = (int) ((position *) a)->index;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2431 bindex = (int) ((position *) b)->index;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2432
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2433 return aindex - bindex;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2434 }
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2435
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2436 static void
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2437 reorder_tokens (struct dfa *d)
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2438 {
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2439 ptrdiff_t nleaves;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2440 ptrdiff_t *map;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2441 token *tokens;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2442 position_set *follows;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2443 int *constraints;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2444 char *multibyte_prop;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2445
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2446 nleaves = 0;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2447
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2448 map = xnmalloc (d->tindex, sizeof *map);
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2449
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2450 map[0] = nleaves++;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2451
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2452 for (ptrdiff_t i = 1; i < d->tindex; i++)
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2453 map[i] = -1;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2454
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2455 tokens = xnmalloc (d->nleaves, sizeof *tokens);
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2456 follows = xnmalloc (d->nleaves, sizeof *follows);
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2457 constraints = xnmalloc (d->nleaves, sizeof *constraints);
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2458
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2459 if (d->localeinfo.multibyte)
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2460 multibyte_prop = xnmalloc (d->nleaves, sizeof *multibyte_prop);
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2461 else
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2462 multibyte_prop = NULL;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2463
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2464 for (ptrdiff_t i = 0; i < d->tindex; i++)
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2465 {
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2466 if (map[i] == -1)
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2467 {
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2468 free (d->follows[i].elems);
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2469 d->follows[i].elems = NULL;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2470 d->follows[i].nelem = 0;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2471 continue;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2472 }
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2473
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2474 tokens[map[i]] = d->tokens[i];
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2475 follows[map[i]] = d->follows[i];
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2476 constraints[map[i]] = d->constraints[i];
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2477
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2478 if (multibyte_prop != NULL)
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2479 multibyte_prop[map[i]] = d->multibyte_prop[i];
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2480
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2481 for (ptrdiff_t j = 0; j < d->follows[i].nelem; j++)
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2482 {
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2483 if (map[d->follows[i].elems[j].index] == -1)
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2484 map[d->follows[i].elems[j].index] = nleaves++;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2485
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2486 d->follows[i].elems[j].index = map[d->follows[i].elems[j].index];
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2487 }
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2488
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2489 qsort (d->follows[i].elems, d->follows[i].nelem,
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2490 sizeof *d->follows[i].elems, compare);
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2491 }
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2492
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2493 for (ptrdiff_t i = 0; i < nleaves; i++)
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2494 {
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2495 d->tokens[i] = tokens[i];
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2496 d->follows[i] = follows[i];
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2497 d->constraints[i] = constraints[i];
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2498
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2499 if (multibyte_prop != NULL)
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2500 d->multibyte_prop[i] = multibyte_prop[i];
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2501 }
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2502
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2503 d->tindex = d->nleaves = nleaves;
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2504
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2505 free (tokens);
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2506 free (follows);
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2507 free (constraints);
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2508 free (multibyte_prop);
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2509 free (map);
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2510 }
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2511
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2512 static void
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2513 dfaoptimize (struct dfa *d)
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2514 {
39859
1f2a63e46815 dfa: tweak allocation performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 39858
diff changeset
2515 char *flags;
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2516 position_set merged0;
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2517 position_set *merged;
39955
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2518
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2519 flags = xmalloc (d->tindex * sizeof *flags);
39859
1f2a63e46815 dfa: tweak allocation performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 39858
diff changeset
2520 memset (flags, 0, d->tindex * sizeof *flags);
1f2a63e46815 dfa: tweak allocation performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 39858
diff changeset
2521
1f2a63e46815 dfa: tweak allocation performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 39858
diff changeset
2522 for (size_t i = 0; i < d->tindex; i++)
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2523 {
39859
1f2a63e46815 dfa: tweak allocation performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 39858
diff changeset
2524 for (ptrdiff_t j = 0; j < d->follows[i].nelem; j++)
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2525 {
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2526 if (d->follows[i].elems[j].index == i)
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2527 flags[d->follows[i].elems[j].index] |= OPT_REPEAT;
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2528 else if (d->follows[i].elems[j].index < i)
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2529 flags[d->follows[i].elems[j].index] |= OPT_LPAREN;
39859
1f2a63e46815 dfa: tweak allocation performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 39858
diff changeset
2530 else if (flags[d->follows[i].elems[j].index] &= OPT_WALKED)
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2531 flags[d->follows[i].elems[j].index] |= OPT_RPAREN;
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2532 else
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2533 flags[d->follows[i].elems[j].index] |= OPT_WALKED;
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2534 }
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2535 }
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2536
39955
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2537 flags[0] |= OPT_QUEUED;
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2538
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2539 merged = &merged0;
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2540 alloc_position_set (merged, d->nleaves);
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2541
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2542 d->constraints = xnmalloc (d->tindex, sizeof *d->constraints);
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2543
39955
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2544 for (ptrdiff_t i = 0; i < d->tindex; i++)
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2545 if (flags[i] & OPT_QUEUED)
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2546 merge_nfa_state (d, i, flags, merged);
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2547
39957
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2548 reorder_tokens (d);
6d6c0b94693c dfa: reorder tokens before execution
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39956
diff changeset
2549
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2550 free (merged->elems);
39955
7c568600d07f dfa: simplify dfa optimization
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39954
diff changeset
2551 free (flags);
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2552 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2553
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2554 /* Perform bottom-up analysis on the parse tree, computing various functions.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2555 Note that at this point, we're pretending constructs like \< are real
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2556 characters rather than constraints on what can follow them.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2557
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2558 Nullable: A node is nullable if it is at the root of a regexp that can
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2559 match the empty string.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2560 * EMPTY leaves are nullable.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2561 * No other leaf is nullable.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2562 * A QMARK or STAR node is nullable.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2563 * A PLUS node is nullable if its argument is nullable.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2564 * A CAT node is nullable if both its arguments are nullable.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2565 * An OR node is nullable if either argument is nullable.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2566
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2567 Firstpos: The firstpos of a node is the set of positions (nonempty leaves)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2568 that could correspond to the first character of a string matching the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2569 regexp rooted at the given node.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2570 * EMPTY leaves have empty firstpos.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2571 * The firstpos of a nonempty leaf is that leaf itself.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2572 * The firstpos of a QMARK, STAR, or PLUS node is the firstpos of its
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2573 argument.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2574 * The firstpos of a CAT node is the firstpos of the left argument, union
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2575 the firstpos of the right if the left argument is nullable.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2576 * The firstpos of an OR node is the union of firstpos of each argument.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2577
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2578 Lastpos: The lastpos of a node is the set of positions that could
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2579 correspond to the last character of a string matching the regexp at
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2580 the given node.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2581 * EMPTY leaves have empty lastpos.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2582 * The lastpos of a nonempty leaf is that leaf itself.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2583 * The lastpos of a QMARK, STAR, or PLUS node is the lastpos of its
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2584 argument.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2585 * The lastpos of a CAT node is the lastpos of its right argument, union
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2586 the lastpos of the left if the right argument is nullable.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2587 * The lastpos of an OR node is the union of the lastpos of each argument.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2588
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2589 Follow: The follow of a position is the set of positions that could
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2590 correspond to the character following a character matching the node in
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2591 a string matching the regexp. At this point we consider special symbols
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2592 that match the empty string in some context to be just normal characters.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2593 Later, if we find that a special symbol is in a follow set, we will
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2594 replace it with the elements of its follow, labeled with an appropriate
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2595 constraint.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2596 * Every node in the firstpos of the argument of a STAR or PLUS node is in
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2597 the follow of every node in the lastpos.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2598 * Every node in the firstpos of the second argument of a CAT node is in
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2599 the follow of every node in the lastpos of the first argument.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2600
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2601 Because of the postfix representation of the parse tree, the depth-first
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2602 analysis is conveniently done by a linear scan with the aid of a stack.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2603 Sets are stored as arrays of the elements, obeying a stack-like allocation
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2604 scheme; the number of elements in each set deeper in the stack can be
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2605 used to determine the address of a particular set's array. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2606 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2607 dfaanalyze (struct dfa *d, bool searchflag)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2608 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2609 /* Array allocated to hold position sets. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2610 position *posalloc = xnmalloc (d->nleaves, 2 * sizeof *posalloc);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2611 /* Firstpos and lastpos elements. */
39954
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2612 position *firstpos = posalloc;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2613 position *lastpos = firstpos + d->nleaves;
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2614 position pos;
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2615 position_set tmp;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2616
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2617 /* Stack for element counts and nullable flags. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2618 struct
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2619 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2620 /* Whether the entry is nullable. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2621 bool nullable;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2622
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2623 /* Counts of firstpos and lastpos sets. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2624 size_t nfirstpos;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2625 size_t nlastpos;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2626 } *stkalloc = xnmalloc (d->depth, sizeof *stkalloc), *stk = stkalloc;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2627
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2628 position_set merged; /* Result of merging sets. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2629
39855
a29036ff511d dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39760
diff changeset
2630 addtok (d, CAT);
a29036ff511d dfa: simplify initial state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39760
diff changeset
2631
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2632 #ifdef DEBUG
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2633 fprintf (stderr, "dfaanalyze:\n");
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
2634 for (size_t i = 0; i < d->tindex; ++i)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2635 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2636 fprintf (stderr, " %zu:", i);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2637 prtok (d->tokens[i]);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2638 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2639 putc ('\n', stderr);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2640 #endif
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2641
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2642 d->searchflag = searchflag;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2643 alloc_position_set (&merged, d->nleaves);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2644 d->follows = xcalloc (d->tindex, sizeof *d->follows);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2645
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
2646 for (size_t i = 0; i < d->tindex; ++i)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2647 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2648 switch (d->tokens[i])
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2649 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2650 case EMPTY:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2651 /* The empty set is nullable. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2652 stk->nullable = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2653
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2654 /* The firstpos and lastpos of the empty leaf are both empty. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2655 stk->nfirstpos = stk->nlastpos = 0;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2656 stk++;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2657 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2658
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2659 case STAR:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2660 case PLUS:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2661 /* Every element in the firstpos of the argument is in the follow
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2662 of every element in the lastpos. */
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2663 {
39954
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2664 tmp.elems = firstpos - stk[-1].nfirstpos;
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2665 tmp.nelem = stk[-1].nfirstpos;
40026
c51e38088432 dfa: avoid new warnings from gcc
Jim Meyering <meyering@fb.com>
parents: 39958
diff changeset
2666 position *p = lastpos - stk[-1].nlastpos;
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2667 for (size_t j = 0; j < stk[-1].nlastpos; j++)
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2668 {
40026
c51e38088432 dfa: avoid new warnings from gcc
Jim Meyering <meyering@fb.com>
parents: 39958
diff changeset
2669 merge (&tmp, &d->follows[p[j].index], &merged);
c51e38088432 dfa: avoid new warnings from gcc
Jim Meyering <meyering@fb.com>
parents: 39958
diff changeset
2670 copy (&merged, &d->follows[p[j].index]);
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2671 }
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2672 }
18914
886945d1fa95 manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents: 18752
diff changeset
2673 FALLTHROUGH;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2674 case QMARK:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2675 /* A QMARK or STAR node is automatically nullable. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2676 if (d->tokens[i] != PLUS)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2677 stk[-1].nullable = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2678 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2679
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2680 case CAT:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2681 /* Every element in the firstpos of the second argument is in the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2682 follow of every element in the lastpos of the first argument. */
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2683 {
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2684 tmp.nelem = stk[-1].nfirstpos;
39954
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2685 tmp.elems = firstpos - stk[-1].nfirstpos;
40026
c51e38088432 dfa: avoid new warnings from gcc
Jim Meyering <meyering@fb.com>
parents: 39958
diff changeset
2686 position *p = lastpos - stk[-1].nlastpos - stk[-2].nlastpos;
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2687 for (size_t j = 0; j < stk[-2].nlastpos; j++)
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2688 {
40026
c51e38088432 dfa: avoid new warnings from gcc
Jim Meyering <meyering@fb.com>
parents: 39958
diff changeset
2689 merge (&tmp, &d->follows[p[j].index], &merged);
c51e38088432 dfa: avoid new warnings from gcc
Jim Meyering <meyering@fb.com>
parents: 39958
diff changeset
2690 copy (&merged, &d->follows[p[j].index]);
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2691 }
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2692 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2693
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2694 /* The firstpos of a CAT node is the firstpos of the first argument,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2695 union that of the second argument if the first is nullable. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2696 if (stk[-2].nullable)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2697 stk[-2].nfirstpos += stk[-1].nfirstpos;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2698 else
39954
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2699 firstpos -= stk[-1].nfirstpos;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2700
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2701 /* The lastpos of a CAT node is the lastpos of the second argument,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2702 union that of the first argument if the second is nullable. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2703 if (stk[-1].nullable)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2704 stk[-2].nlastpos += stk[-1].nlastpos;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2705 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2706 {
40026
c51e38088432 dfa: avoid new warnings from gcc
Jim Meyering <meyering@fb.com>
parents: 39958
diff changeset
2707 position *p = lastpos - stk[-1].nlastpos - stk[-2].nlastpos;
39954
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2708 for (size_t j = 0; j < stk[-1].nlastpos; j++)
40026
c51e38088432 dfa: avoid new warnings from gcc
Jim Meyering <meyering@fb.com>
parents: 39958
diff changeset
2709 p[j] = p[j + stk[-2].nlastpos];
39954
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2710 lastpos -= stk[-2].nlastpos;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2711 stk[-2].nlastpos = stk[-1].nlastpos;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2712 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2713
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2714 /* A CAT node is nullable if both arguments are nullable. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2715 stk[-2].nullable &= stk[-1].nullable;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2716 stk--;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2717 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2718
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2719 case OR:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2720 /* The firstpos is the union of the firstpos of each argument. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2721 stk[-2].nfirstpos += stk[-1].nfirstpos;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2722
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2723 /* The lastpos is the union of the lastpos of each argument. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2724 stk[-2].nlastpos += stk[-1].nlastpos;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2725
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2726 /* An OR node is nullable if either argument is nullable. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2727 stk[-2].nullable |= stk[-1].nullable;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2728 stk--;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2729 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2730
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2731 default:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2732 /* Anything else is a nonempty position. (Note that special
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2733 constructs like \< are treated as nonempty strings here;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2734 an "epsilon closure" effectively makes them nullable later.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2735 Backreferences have to get a real position so we can detect
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2736 transitions on them later. But they are nullable. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2737 stk->nullable = d->tokens[i] == BACKREF;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2738
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2739 /* This position is in its own firstpos and lastpos. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2740 stk->nfirstpos = stk->nlastpos = 1;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2741 stk++;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2742
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2743 firstpos->index = lastpos->index = i;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2744 firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
39954
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2745 firstpos++, lastpos++;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2746
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2747 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2748 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2749 #ifdef DEBUG
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2750 /* ... balance the above nonsyntactic #ifdef goo... */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2751 fprintf (stderr, "node %zu:", i);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2752 prtok (d->tokens[i]);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2753 putc ('\n', stderr);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2754 fprintf (stderr,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2755 stk[-1].nullable ? " nullable: yes\n" : " nullable: no\n");
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2756 fprintf (stderr, " firstpos:");
39954
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2757 for (size_t j = 0; j < stk[-1].nfirstpos; j++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2758 {
39954
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2759 fprintf (stderr, " %zu:", firstpos[j - stk[-1].nfirstpos].index);
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2760 prtok (d->tokens[firstpos[j - stk[-1].nfirstpos].index]);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2761 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2762 fprintf (stderr, "\n lastpos:");
39954
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2763 for (size_t j = 0; j < stk[-1].nlastpos; j++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2764 {
39954
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2765 fprintf (stderr, " %zu:", lastpos[j - stk[-1].nlastpos].index);
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2766 prtok (d->tokens[lastpos[j - stk[-1].nlastpos].index]);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2767 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2768 putc ('\n', stderr);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2769 #endif
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2770 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2771
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2772 /* For each follow set that is the follow set of a real position, replace
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2773 it with its epsilon closure. */
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2774 epsclosure (d);
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2775
39856
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2776 dfaoptimize (d);
469c01483bf1 dfa: optimize alternation in NFA
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39855
diff changeset
2777
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2778 #ifdef DEBUG
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
2779 for (size_t i = 0; i < d->tindex; ++i)
39953
2f4c84e23e3c dfa: remove unneeded code
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39862
diff changeset
2780 if (d->tokens[i] == BEG || d->tokens[i] < NOTCHAR
2f4c84e23e3c dfa: remove unneeded code
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39862
diff changeset
2781 || d->tokens[i] == BACKREF || d->tokens[i] == ANYCHAR
2f4c84e23e3c dfa: remove unneeded code
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39862
diff changeset
2782 || d->tokens[i] == MBCSET || d->tokens[i] >= CSET)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2783 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2784 fprintf (stderr, "follows(%zu:", i);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2785 prtok (d->tokens[i]);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2786 fprintf (stderr, "):");
39954
b6666dd9d140 dfa: position set sorts increasing order
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39953
diff changeset
2787 for (size_t j = 0; j < d->follows[i].nelem; j++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2788 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2789 fprintf (stderr, " %zu:", d->follows[i].elems[j].index);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2790 prtok (d->tokens[d->follows[i].elems[j].index]);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2791 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2792 putc ('\n', stderr);
18607
db280259d3cc dfa: performance improvement for removal of epsilon closure
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18560
diff changeset
2793 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2794 #endif
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2795
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2796 pos.index = 0;
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2797 pos.constraint = NO_CONSTRAINT;
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2798
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2799 alloc_position_set (&tmp, 1);
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2800
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2801 append (pos, &tmp);
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2802
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2803 d->separates = xnmalloc (d->tindex, sizeof *d->separates);
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2804
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2805 for (ptrdiff_t i = 0; i < d->tindex; i++)
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2806 {
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2807 d->separates[i] = 0;
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2808
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2809 if (prev_newline_dependent (d->constraints[i]))
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2810 d->separates[i] |= CTX_NEWLINE;
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2811 if (prev_letter_dependent (d->constraints[i]))
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2812 d->separates[i] |= CTX_LETTER;
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2813
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2814 for (ptrdiff_t j = 0; j < d->follows[i].nelem; j++)
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2815 {
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2816 if (prev_newline_dependent (d->follows[i].elems[j].constraint))
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2817 d->separates[i] |= CTX_NEWLINE;
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2818 if (prev_letter_dependent (d->follows[i].elems[j].constraint))
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2819 d->separates[i] |= CTX_LETTER;
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2820 }
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2821 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2822
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2823 /* Context wanted by some position. */
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2824 int separate_contexts = state_separate_contexts (d, &tmp);
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
2825
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2826 /* Build the initial state. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2827 if (separate_contexts & CTX_NEWLINE)
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2828 state_index (d, &tmp, CTX_NEWLINE);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2829 d->initstate_notbol = d->min_trcount
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2830 = state_index (d, &tmp, separate_contexts ^ CTX_ANY);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2831 if (separate_contexts & CTX_LETTER)
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2832 d->min_trcount = state_index (d, &tmp, CTX_LETTER);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2833 d->min_trcount++;
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
2834 d->trcount = 0;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2835
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2836 free (posalloc);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2837 free (stkalloc);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2838 free (merged.elems);
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2839 free (tmp.elems);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2840 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2841
18658
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2842 /* Make sure D's state arrays are large enough to hold NEW_STATE. */
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2843 static void
18659
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
2844 realloc_trans_if_necessary (struct dfa *d)
18658
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2845 {
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2846 state_num oldalloc = d->tralloc;
18659
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
2847 if (oldalloc < d->sindex)
18658
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2848 {
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2849 state_num **realtrans = d->trans ? d->trans - 2 : NULL;
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2850 ptrdiff_t newalloc1 = realtrans ? d->tralloc + 2 : 0;
18659
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
2851 realtrans = xpalloc (realtrans, &newalloc1, d->sindex - oldalloc,
18658
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2852 -1, sizeof *realtrans);
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2853 realtrans[0] = realtrans[1] = NULL;
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2854 d->trans = realtrans + 2;
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2855 ptrdiff_t newalloc = d->tralloc = newalloc1 - 2;
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2856 d->fails = xnrealloc (d->fails, newalloc, sizeof *d->fails);
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2857 d->success = xnrealloc (d->success, newalloc, sizeof *d->success);
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2858 d->newlines = xnrealloc (d->newlines, newalloc, sizeof *d->newlines);
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2859 if (d->localeinfo.multibyte)
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2860 {
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2861 realtrans = d->mb_trans ? d->mb_trans - 2 : NULL;
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2862 realtrans = xnrealloc (realtrans, newalloc1, sizeof *realtrans);
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2863 if (oldalloc == 0)
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2864 realtrans[0] = realtrans[1] = NULL;
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2865 d->mb_trans = realtrans + 2;
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2866 }
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2867 for (; oldalloc < newalloc; oldalloc++)
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2868 {
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2869 d->trans[oldalloc] = NULL;
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2870 d->fails[oldalloc] = NULL;
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2871 if (d->localeinfo.multibyte)
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2872 d->mb_trans[oldalloc] = NULL;
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2873 }
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2874 }
384886b3e35b dfa: fix reallocation bug when matching newlines
Paul Eggert <eggert@cs.ucla.edu>
parents: 18651
diff changeset
2875 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2876
18660
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2877 /*
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2878 Calculate the transition table for a new state derived from state s
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2879 for a compiled dfa d after input character uc, and return the new
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2880 state number.
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
2881
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
2882 Do not worry about all possible input characters; calculate just the group
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
2883 of positions that match uc. Label it with the set of characters that
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2884 every position in the group matches (taking into account, if necessary,
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
2885 preceding context information of s). Then find the union
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
2886 of these positions' follows, i.e., the set of positions of the
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2887 new state. For each character in the group's label, set the transition
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2888 on this character to be to a state corresponding to the set's positions,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2889 and its associated backward context information, if necessary.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2890
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
2891 When building a searching matcher, include the positions of state
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2892 0 in every state.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2893
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
2894 The group is constructed by building an equivalence-class
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2895 partition of the positions of s.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2896
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2897 For each position, find the set of characters C that it matches. Eliminate
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2898 any characters from C that fail on grounds of backward context.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2899
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
2900 Check whether the group's label L has nonempty
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2901 intersection with C. If L - C is nonempty, create a new group labeled
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2902 L - C and having the same positions as the current group, and set L to
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
2903 the intersection of L and C. Insert the position in the group, set
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2904 C = C - L, and resume scanning.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2905
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2906 If after comparing with every group there are characters remaining in C,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2907 create a new group labeled with the characters of C and insert this
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2908 position in that group. */
18660
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2909
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
2910 static state_num
18660
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2911 build_state (state_num s, struct dfa *d, unsigned char uc)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2912 {
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2913 position_set follows; /* Union of the follows for each
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2914 position of the current state. */
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2915 position_set group; /* Positions that match the input char. */
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2916 position_set tmp; /* Temporary space for merging sets. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2917 state_num state; /* New state. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2918 state_num state_newline; /* New state on a newline transition. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2919 state_num state_letter; /* New state on a letter transition. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2920
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2921 #ifdef DEBUG
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2922 fprintf (stderr, "build state %td\n", s);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2923 #endif
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2924
18660
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2925 /* A pointer to the new transition table, and the table itself. */
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2926 state_num **ptrans = (accepting (s, d) ? d->fails : d->trans) + s;
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2927 state_num *trans = *ptrans;
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2928
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2929 if (!trans)
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2930 {
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2931 /* MAX_TRCOUNT is an arbitrary upper limit on the number of
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2932 transition tables that can exist at once, other than for
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2933 initial states. Often-used transition tables are quickly
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2934 rebuilt, whereas rarely-used ones are cleared away. */
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2935 if (MAX_TRCOUNT <= d->trcount)
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2936 {
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2937 for (state_num i = d->min_trcount; i < d->tralloc; i++)
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2938 {
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2939 free (d->trans[i]);
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2940 free (d->fails[i]);
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2941 d->trans[i] = d->fails[i] = NULL;
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2942 }
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2943 d->trcount = 0;
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2944 }
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2945
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2946 d->trcount++;
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2947 *ptrans = trans = xmalloc (NOTCHAR * sizeof *trans);
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2948
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2949 /* Fill transition table with a default value which means that the
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2950 transited state has not been calculated yet. */
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2951 for (int i = 0; i < NOTCHAR; i++)
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2952 trans[i] = -2;
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2953 }
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2954
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2955 /* Set up the success bits for this state. */
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2956 d->success[s] = 0;
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2957 if (accepts_in_context (d->states[s].context, CTX_NEWLINE, s, d))
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2958 d->success[s] |= CTX_NEWLINE;
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2959 if (accepts_in_context (d->states[s].context, CTX_LETTER, s, d))
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2960 d->success[s] |= CTX_LETTER;
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2961 if (accepts_in_context (d->states[s].context, CTX_NONE, s, d))
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2962 d->success[s] |= CTX_NONE;
9812ab19bd35 dfa: melt down dfastate into build_state
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18659
diff changeset
2963
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2964 alloc_position_set (&follows, d->nleaves);
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2965
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2966 /* Find the union of the follows of the positions of the group.
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2967 This is a hideously inefficient loop. Fix it someday. */
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2968 for (size_t j = 0; j < d->states[s].elems.nelem; ++j)
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2969 for (size_t k = 0;
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2970 k < d->follows[d->states[s].elems.elems[j].index].nelem; ++k)
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2971 insert (d->follows[d->states[s].elems.elems[j].index].elems[k],
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2972 &follows);
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2973
18633
42cabb9832cd dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18632
diff changeset
2974 /* Positions that match the input char. */
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2975 alloc_position_set (&group, d->nleaves);
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
2976
18633
42cabb9832cd dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18632
diff changeset
2977 /* The group's label. */
42cabb9832cd dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18632
diff changeset
2978 charclass label;
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
2979 fillset (&label);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2980
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2981 for (size_t i = 0; i < follows.nelem; ++i)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2982 {
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
2983 charclass matches; /* Set of matching characters. */
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
2984 position pos = follows.elems[i];
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
2985 bool matched = false;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2986 if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR)
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
2987 {
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
2988 zeroset (&matches);
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
2989 setbit (d->tokens[pos.index], &matches);
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
2990 if (d->tokens[pos.index] == uc)
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
2991 matched = true;
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
2992 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2993 else if (d->tokens[pos.index] >= CSET)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
2994 {
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
2995 matches = d->charclasses[d->tokens[pos.index] - CSET];
18931
6daf1ec75a2e dfa: two small simplifications
Jim Meyering <meyering@fb.com>
parents: 18914
diff changeset
2996 if (tstbit (uc, &matches))
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
2997 matched = true;
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
2998 }
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
2999 else if (d->tokens[pos.index] == ANYCHAR)
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
3000 {
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
3001 matches = d->charclasses[d->canychar];
18931
6daf1ec75a2e dfa: two small simplifications
Jim Meyering <meyering@fb.com>
parents: 18914
diff changeset
3002 if (tstbit (uc, &matches))
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3003 matched = true;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3004
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3005 /* ANYCHAR must match with a single character, so we must put
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3006 it to D->states[s].mbps which contains the positions which
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3007 can match with a single character not a byte. If all
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3008 positions which has ANYCHAR does not depend on context of
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3009 next character, we put the follows instead of it to
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3010 D->states[s].mbps to optimize. */
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
3011 if (succeeds_in_context (pos.constraint, d->states[s].context,
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3012 CTX_NONE))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3013 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3014 if (d->states[s].mbps.nelem == 0)
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
3015 alloc_position_set (&d->states[s].mbps, 1);
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
3016 insert (pos, &d->states[s].mbps);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3017 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3018 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3019 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3020 continue;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3021
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3022 /* Some characters may need to be eliminated from matches because
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3023 they fail in the current context. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3024 if (pos.constraint != NO_CONSTRAINT)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3025 {
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
3026 if (!succeeds_in_context (pos.constraint,
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3027 d->states[s].context, CTX_NEWLINE))
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3028 for (size_t j = 0; j < CHARCLASS_WORDS; ++j)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
3029 matches.w[j] &= ~d->syntax.newline.w[j];
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
3030 if (!succeeds_in_context (pos.constraint,
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3031 d->states[s].context, CTX_LETTER))
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3032 for (size_t j = 0; j < CHARCLASS_WORDS; ++j)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
3033 matches.w[j] &= ~d->syntax.letters.w[j];
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
3034 if (!succeeds_in_context (pos.constraint,
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3035 d->states[s].context, CTX_NONE))
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3036 for (size_t j = 0; j < CHARCLASS_WORDS; ++j)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
3037 matches.w[j] &= d->syntax.letters.w[j] | d->syntax.newline.w[j];
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3038
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3039 /* If there are no characters left, there's no point in going on. */
18668
1fe5f10b4b1c dfa: minor simplification with emptyset
Paul Eggert <eggert@cs.ucla.edu>
parents: 18667
diff changeset
3040 if (emptyset (&matches))
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3041 continue;
18535
9c210050a97b dfa: avoid new infinite loop
Jim Meyering <meyering@fb.com>
parents: 18534
diff changeset
3042
9c210050a97b dfa: avoid new infinite loop
Jim Meyering <meyering@fb.com>
parents: 18534
diff changeset
3043 /* If we have reset the bit that made us declare "matched", reset
9c210050a97b dfa: avoid new infinite loop
Jim Meyering <meyering@fb.com>
parents: 18534
diff changeset
3044 that indicator, too. This is required to avoid an infinite loop
9c210050a97b dfa: avoid new infinite loop
Jim Meyering <meyering@fb.com>
parents: 18534
diff changeset
3045 with this command: echo cx | LC_ALL=C grep -E 'c\b[x ]' */
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
3046 if (!tstbit (uc, &matches))
18535
9c210050a97b dfa: avoid new infinite loop
Jim Meyering <meyering@fb.com>
parents: 18534
diff changeset
3047 matched = false;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3048 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3049
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3050 #ifdef DEBUG
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3051 fprintf (stderr, " nextpos %zu:", pos.index);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3052 prtok (d->tokens[pos.index]);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3053 fprintf (stderr, " of");
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3054 for (size_t j = 0; j < NOTCHAR; j++)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
3055 if (tstbit (j, &matches))
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3056 fprintf (stderr, " 0x%02zx", j);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3057 fprintf (stderr, "\n");
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3058 #endif
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3059
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3060 if (matched)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3061 {
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3062 for (size_t k = 0; k < CHARCLASS_WORDS; ++k)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
3063 label.w[k] &= matches.w[k];
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
3064 append (pos, &group);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3065 }
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3066 else
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3067 {
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3068 for (size_t k = 0; k < CHARCLASS_WORDS; ++k)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
3069 label.w[k] &= ~matches.w[k];
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3070 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3071 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3072
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3073 alloc_position_set (&tmp, d->nleaves);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3074
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3075 if (group.nelem > 0)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3076 {
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3077 /* If we are building a searching matcher, throw in the positions
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3078 of state 0 as well, if possible. */
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3079 if (d->searchflag)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3080 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3081 /* If a token in follows.elems is not 1st byte of a multibyte
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3082 character, or the states of follows must accept the bytes
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3083 which are not 1st byte of the multibyte character.
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3084 Then, if a state of follows encounters a byte, it must not be
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3085 a 1st byte of a multibyte character nor a single byte character.
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3086 In this case, do not add state[0].follows to next state, because
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3087 state[0] must accept 1st-byte.
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3088
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3089 For example, suppose <sb a> is a certain single byte character,
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3090 <mb A> is a certain multibyte character, and the codepoint of
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3091 <sb a> equals the 2nd byte of the codepoint of <mb A>. When
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3092 state[0] accepts <sb a>, state[i] transits to state[i+1] by
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3093 accepting the 1st byte of <mb A>, and state[i+1] accepts the
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3094 2nd byte of <mb A>, if state[i+1] encounters the codepoint of
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3095 <sb a>, it must not be <sb a> but the 2nd byte of <mb A>, so do
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3096 not add state[0]. */
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3097
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3098 bool mergeit = !d->localeinfo.multibyte;
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3099 if (!mergeit)
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3100 {
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3101 mergeit = true;
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
3102 for (size_t j = 0; mergeit && j < group.nelem; j++)
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
3103 mergeit &= d->multibyte_prop[group.elems[j].index];
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3104 }
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3105 if (mergeit)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3106 {
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
3107 merge (&d->states[0].elems, &group, &tmp);
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
3108 copy (&tmp, &group);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3109 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3110 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3111
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3112 /* Find out if the new state will want any context information,
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3113 by calculating possible contexts that the group can match,
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3114 and separate contexts that the new state wants to know. */
40047
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
3115 int possible_contexts = charclass_context (d, &label);
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
3116 int separate_contexts = state_separate_contexts (d, &group);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3117
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3118 /* Find the state(s) corresponding to the union of the follows. */
40047
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
3119 if (possible_contexts & ~separate_contexts)
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
3120 state = state_index (d, &group, separate_contexts ^ CTX_ANY);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3121 else
40047
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
3122 state = -1;
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
3123 if (separate_contexts & possible_contexts & CTX_NEWLINE)
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
3124 state_newline = state_index (d, &group, CTX_NEWLINE);
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
3125 else
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
3126 state_newline = state;
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
3127 if (separate_contexts & possible_contexts & CTX_LETTER)
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
3128 state_letter = state_index (d, &group, CTX_LETTER);
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
3129 else
183a2f6b0b16 revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
Jim Meyering <meyering@fb.com>
parents: 40026
diff changeset
3130 state_letter = state;
18659
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3131
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3132 /* Reallocate now, to reallocate any newline transition properly. */
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3133 realloc_trans_if_necessary (d);
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3134 }
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3135
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3136 /* If we are a searching matcher, the default transition is to a state
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3137 containing the positions of state 0, otherwise the default transition
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3138 is to fail miserably. */
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3139 else if (d->searchflag)
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3140 {
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3141 state_newline = 0;
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3142 state_letter = d->min_trcount - 1;
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3143 state = d->initstate_notbol;
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3144 }
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3145 else
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3146 {
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3147 state_newline = -1;
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3148 state_letter = -1;
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3149 state = -1;
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3150 }
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3151
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3152 /* Set the transitions for each character in the label. */
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3153 for (size_t i = 0; i < NOTCHAR; i++)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
3154 if (tstbit (i, &label))
18659
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3155 switch (d->syntax.sbit[i])
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3156 {
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3157 case CTX_NEWLINE:
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3158 trans[i] = state_newline;
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3159 break;
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3160 case CTX_LETTER:
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3161 trans[i] = state_letter;
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3162 break;
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3163 default:
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3164 trans[i] = state;
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3165 break;
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3166 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3167
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3168 #ifdef DEBUG
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3169 fprintf (stderr, "trans table %td", s);
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3170 for (size_t i = 0; i < NOTCHAR; ++i)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3171 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3172 if (!(i & 0xf))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3173 fprintf (stderr, "\n");
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3174 fprintf (stderr, " %2td", trans[i]);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3175 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3176 fprintf (stderr, "\n");
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3177 #endif
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3178
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3179 free (group.elems);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3180 free (follows.elems);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3181 free (tmp.elems);
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3182
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3183 /* Keep the newline transition in a special place so we can use it as
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3184 a sentinel. */
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
3185 if (tstbit (d->syntax.eolbyte, &label))
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3186 {
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3187 d->newlines[s] = trans[d->syntax.eolbyte];
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3188 trans[d->syntax.eolbyte] = -1;
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3189 }
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3190
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3191 return trans[uc];
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3192 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3193
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3194 /* Multibyte character handling sub-routines for dfaexec. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3195
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3196 /* Consume a single byte and transit state from 's' to '*next_state'.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3197 This function is almost same as the state transition routin in dfaexec.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3198 But state transition is done just once, otherwise matching succeed or
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3199 reach the end of the buffer. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3200 static state_num
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3201 transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const **pp)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3202 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3203 state_num *t;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3204
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3205 if (d->trans[s])
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3206 t = d->trans[s];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3207 else if (d->fails[s])
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3208 t = d->fails[s];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3209 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3210 {
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3211 build_state (s, d, **pp);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3212 if (d->trans[s])
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3213 t = d->trans[s];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3214 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3215 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3216 t = d->fails[s];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3217 assert (t);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3218 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3219 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3220
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3221 if (t[**pp] == -2)
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3222 build_state (s, d, **pp);
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3223
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3224 return t[*(*pp)++];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3225 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3226
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3227 /* Transit state from s, then return new state and update the pointer of
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3228 the buffer. This function is for a period operator which can match a
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3229 multi-byte character. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3230 static state_num
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3231 transit_state (struct dfa *d, state_num s, unsigned char const **pp,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3232 unsigned char const *end)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3233 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3234 wint_t wc;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3235
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3236 int mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3237
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3238 /* This state has some operators which can match a multibyte character. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3239 d->mb_follows.nelem = 0;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3240
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3241 /* Calculate the state which can be reached from the state 's' by
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3242 consuming 'mbclen' single bytes from the buffer. */
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3243 state_num s1 = s;
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3244 int mbci;
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3245 for (mbci = 0; mbci < mbclen && (mbci == 0 || d->min_trcount <= s); mbci++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3246 s = transit_state_singlebyte (d, s, pp);
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3247 *pp += mbclen - mbci;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3248
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3249 if (wc == WEOF)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3250 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3251 /* It is an invalid character, so ANYCHAR is not accepted. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3252 return s;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3253 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3254
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3255 /* If all positions which have ANYCHAR do not depend on the context
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3256 of the next character, calculate the next state with
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3257 pre-calculated follows and cache the result. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3258 if (d->states[s1].mb_trindex < 0)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3259 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3260 if (MAX_TRCOUNT <= d->mb_trcount)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3261 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3262 state_num s3;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3263 for (s3 = -1; s3 < d->tralloc; s3++)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3264 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3265 free (d->mb_trans[s3]);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3266 d->mb_trans[s3] = NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3267 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3268
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3269 for (state_num i = 0; i < d->sindex; i++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3270 d->states[i].mb_trindex = -1;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3271 d->mb_trcount = 0;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3272 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3273 d->states[s1].mb_trindex = d->mb_trcount++;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3274 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3275
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3276 if (! d->mb_trans[s])
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3277 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3278 enum { TRANSPTR_SIZE = sizeof *d->mb_trans[s] };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3279 enum { TRANSALLOC_SIZE = MAX_TRCOUNT * TRANSPTR_SIZE };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3280 d->mb_trans[s] = xmalloc (TRANSALLOC_SIZE);
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3281 for (int i = 0; i < MAX_TRCOUNT; i++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3282 d->mb_trans[s][i] = -1;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3283 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3284 else if (d->mb_trans[s][d->states[s1].mb_trindex] >= 0)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3285 return d->mb_trans[s][d->states[s1].mb_trindex];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3286
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3287 if (s == -1)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3288 copy (&d->states[s1].mbps, &d->mb_follows);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3289 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3290 merge (&d->states[s1].mbps, &d->states[s].elems, &d->mb_follows);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3291
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
3292 int separate_contexts = state_separate_contexts (d, &d->mb_follows);
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3293 state_num s2 = state_index (d, &d->mb_follows, separate_contexts ^ CTX_ANY);
18659
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3294 realloc_trans_if_necessary (d);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3295
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3296 d->mb_trans[s][d->states[s1].mb_trindex] = s2;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3297
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3298 return s2;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3299 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3300
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3301 /* The initial state may encounter a byte which is not a single byte character
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3302 nor the first byte of a multibyte character. But it is incorrect for the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3303 initial state to accept such a byte. For example, in Shift JIS the regular
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3304 expression "\\" accepts the codepoint 0x5c, but should not accept the second
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3305 byte of the codepoint 0x815c. Then the initial state must skip the bytes
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3306 that are not a single byte character nor the first byte of a multibyte
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3307 character.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3308
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3309 Given DFA state d, use mbs_to_wchar to advance MBP until it reaches
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3310 or exceeds P, and return the advanced MBP. If WCP is non-NULL and
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3311 the result is greater than P, set *WCP to the final wide character
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3312 processed, or to WEOF if no wide character is processed. Otherwise,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3313 if WCP is non-NULL, *WCP may or may not be updated.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3314
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3315 Both P and MBP must be no larger than END. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3316 static unsigned char const *
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3317 skip_remains_mb (struct dfa *d, unsigned char const *p,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3318 unsigned char const *mbp, char const *end)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3319 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3320 if (d->syntax.never_trail[*p])
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3321 return p;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3322 while (mbp < p)
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3323 {
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3324 wint_t wc;
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3325 mbp += mbs_to_wchar (&wc, (char const *) mbp,
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3326 end - (char const *) mbp, d);
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3327 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3328 return mbp;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3329 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3330
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3331 /* Search through a buffer looking for a match to the struct dfa *D.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3332 Find the first occurrence of a string matching the regexp in the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3333 buffer, and the shortest possible version thereof. Return a pointer to
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3334 the first character after the match, or NULL if none is found. BEGIN
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3335 points to the beginning of the buffer, and END points to the first byte
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3336 after its end. Note however that we store a sentinel byte (usually
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3337 newline) in *END, so the actual buffer must be one byte longer.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3338 When ALLOW_NL, newlines may appear in the matching string.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3339 If COUNT is non-NULL, increment *COUNT once for each newline processed.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3340 If MULTIBYTE, the input consists of multibyte characters and/or
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3341 encoding-error bytes. Otherwise, it consists of single-byte characters.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3342 Here is the list of features that make this DFA matcher punt:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3343 - [M-N] range in non-simple locale: regex is up to 25% faster on [a-z]
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3344 - [^...] in non-simple locale
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3345 - [[=foo=]] or [[.foo.]]
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3346 - [[:alpha:]] etc. in multibyte locale (except [[:digit:]] works OK)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3347 - back-reference: (.)\1
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3348 - word-delimiter in multibyte locale: \<, \>, \b, \B
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3349 See using_simple_locale for the definition of "simple locale". */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3350
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3351 static inline char *
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3352 dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3353 size_t *count, bool multibyte)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3354 {
18444
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3355 if (MAX_TRCOUNT <= d->sindex)
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3356 {
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3357 for (state_num s = d->min_trcount; s < d->sindex; s++)
18444
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3358 {
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3359 free (d->states[s].elems.elems);
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3360 free (d->states[s].mbps.elems);
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3361 }
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3362 d->sindex = d->min_trcount;
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3363
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3364 if (d->trans)
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3365 {
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3366 for (state_num s = 0; s < d->tralloc; s++)
18444
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3367 {
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3368 free (d->trans[s]);
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3369 free (d->fails[s]);
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3370 d->trans[s] = d->fails[s] = NULL;
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3371 }
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3372 d->trcount = 0;
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3373 }
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3374
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3375 if (d->localeinfo.multibyte && d->mb_trans)
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3376 {
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3377 for (state_num s = -1; s < d->tralloc; s++)
18444
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3378 {
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3379 free (d->mb_trans[s]);
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3380 d->mb_trans[s] = NULL;
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3381 }
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3382 for (state_num s = 0; s < d->min_trcount; s++)
18444
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3383 d->states[s].mb_trindex = -1;
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3384 d->mb_trcount = 0;
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3385 }
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3386 }
0f7103b2baf0 dfa: save memory for states
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18435
diff changeset
3387
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3388 if (!d->tralloc)
18659
161f38194efe dfa: simplify transition table allocation
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18658
diff changeset
3389 realloc_trans_if_necessary (d);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3390
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3391 /* Current state. */
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3392 state_num s = 0, s1 = 0;
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3393
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3394 /* Current input character. */
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3395 unsigned char const *p = (unsigned char const *) begin;
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3396 unsigned char const *mbp = p;
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3397
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3398 /* Copy of d->trans so it can be optimized into a register. */
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3399 state_num **trans = d->trans;
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3400 unsigned char eol = d->syntax.eolbyte; /* Likewise for eolbyte. */
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3401 unsigned char saved_end = *(unsigned char *) end;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3402 *end = eol;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3403
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3404 if (multibyte)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3405 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3406 memset (&d->mbs, 0, sizeof d->mbs);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3407 if (d->mb_follows.alloc == 0)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3408 alloc_position_set (&d->mb_follows, d->nleaves);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3409 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3410
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3411 size_t nlcount = 0;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3412 for (;;)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3413 {
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3414 state_num *t;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3415 while ((t = trans[s]) != NULL)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3416 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3417 if (s < d->min_trcount)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3418 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3419 if (!multibyte || d->states[s].mbps.nelem == 0)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3420 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3421 while (t[*p] == s)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3422 p++;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3423 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3424 if (multibyte)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3425 p = mbp = skip_remains_mb (d, p, mbp, end);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3426 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3427
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3428 if (multibyte)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3429 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3430 s1 = s;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3431
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3432 if (d->states[s].mbps.nelem == 0
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3433 || d->localeinfo.sbctowc[*p] != WEOF || (char *) p >= end)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3434 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3435 /* If an input character does not match ANYCHAR, do it
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3436 like a single-byte character. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3437 s = t[*p++];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3438 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3439 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3440 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3441 s = transit_state (d, s, &p, (unsigned char *) end);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3442 mbp = p;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3443 trans = d->trans;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3444 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3445 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3446 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3447 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3448 s1 = t[*p++];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3449 t = trans[s1];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3450 if (! t)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3451 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3452 state_num tmp = s;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3453 s = s1;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3454 s1 = tmp; /* swap */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3455 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3456 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3457 if (s < d->min_trcount)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3458 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3459 while (t[*p] == s1)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3460 p++;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3461 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3462 s = t[*p++];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3463 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3464 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3465
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3466 if (s < 0)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3467 {
18524
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3468 if (s == -2)
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3469 {
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3470 s = build_state (s1, d, p[-1]);
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3471 trans = d->trans;
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3472 }
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3473 else if ((char *) p <= end && p[-1] == eol && 0 <= d->newlines[s1])
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3474 {
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3475 /* The previous character was a newline. Count it, and skip
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3476 checking of multibyte character boundary until here. */
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3477 nlcount++;
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3478 mbp = p;
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3479
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3480 s = (allow_nl ? d->newlines[s1]
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3481 : d->syntax.sbit[eol] == CTX_NEWLINE ? 0
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3482 : d->syntax.sbit[eol] == CTX_LETTER ? d->min_trcount - 1
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3483 : d->initstate_notbol);
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3484 }
06c71a5ec1e9 dfa: fix glitches with on-demand states
Paul Eggert <eggert@cs.ucla.edu>
parents: 18523
diff changeset
3485 else
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3486 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3487 p = NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3488 goto done;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3489 }
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3490 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3491 else if (d->fails[s])
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3492 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3493 if ((d->success[s] & d->syntax.sbit[*p])
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3494 || ((char *) p == end
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
3495 && accepts_in_context (d->states[s].context, CTX_NEWLINE, s,
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
3496 d)))
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3497 goto done;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3498
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3499 if (multibyte && s < d->min_trcount)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3500 p = mbp = skip_remains_mb (d, p, mbp, end);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3501
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3502 s1 = s;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3503 if (!multibyte || d->states[s].mbps.nelem == 0
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3504 || d->localeinfo.sbctowc[*p] != WEOF || (char *) p >= end)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3505 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3506 /* If a input character does not match ANYCHAR, do it
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3507 like a single-byte character. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3508 s = d->fails[s][*p++];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3509 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3510 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3511 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3512 s = transit_state (d, s, &p, (unsigned char *) end);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3513 mbp = p;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3514 trans = d->trans;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3515 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3516 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3517 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3518 {
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3519 build_state (s, d, p[0]);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3520 trans = d->trans;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3521 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3522 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3523
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3524 done:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3525 if (count)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3526 *count += nlcount;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3527 *end = saved_end;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3528 return (char *) p;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3529 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3530
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3531 /* Specialized versions of dfaexec for multibyte and single-byte cases.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3532 This is for performance, as dfaexec_main is an inline function. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3533
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3534 static char *
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3535 dfaexec_mb (struct dfa *d, char const *begin, char *end,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3536 bool allow_nl, size_t *count, bool *backref)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3537 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3538 return dfaexec_main (d, begin, end, allow_nl, count, true);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3539 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3540
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3541 static char *
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3542 dfaexec_sb (struct dfa *d, char const *begin, char *end,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3543 bool allow_nl, size_t *count, bool *backref)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3544 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3545 return dfaexec_main (d, begin, end, allow_nl, count, false);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3546 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3547
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3548 /* Always set *BACKREF and return BEGIN. Use this wrapper for
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3549 any regexp that uses a construct not supported by this code. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3550 static char *
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3551 dfaexec_noop (struct dfa *d, char const *begin, char *end,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3552 bool allow_nl, size_t *count, bool *backref)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3553 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3554 *backref = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3555 return (char *) begin;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3556 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3557
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3558 /* Like dfaexec_main (D, BEGIN, END, ALLOW_NL, COUNT, D->localeinfo.multibyte),
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3559 but faster and set *BACKREF if the DFA code does not support this
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3560 regexp usage. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3561
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3562 char *
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3563 dfaexec (struct dfa *d, char const *begin, char *end,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3564 bool allow_nl, size_t *count, bool *backref)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3565 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3566 return d->dfaexec (d, begin, end, allow_nl, count, backref);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3567 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3568
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3569 struct dfa *
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3570 dfasuperset (struct dfa const *d)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3571 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3572 return d->superset;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3573 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3574
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3575 bool
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3576 dfaisfast (struct dfa const *d)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3577 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3578 return d->fast;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3579 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3580
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3581 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3582 free_mbdata (struct dfa *d)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3583 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3584 free (d->multibyte_prop);
18620
1c30554fd1dc dfa: simplify multibyte_prop etc.
Paul Eggert <eggert@cs.ucla.edu>
parents: 18619
diff changeset
3585 free (d->lex.brack.chars);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3586 free (d->mb_follows.elems);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3587
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3588 if (d->mb_trans)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3589 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3590 state_num s;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3591 for (s = -1; s < d->tralloc; s++)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3592 free (d->mb_trans[s]);
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3593 free (d->mb_trans - 2);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3594 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3595 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3596
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3597 /* Return true if every construct in D is supported by this DFA matcher. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3598 static bool _GL_ATTRIBUTE_PURE
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3599 dfa_supported (struct dfa const *d)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3600 {
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3601 for (size_t i = 0; i < d->tindex; i++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3602 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3603 switch (d->tokens[i])
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3604 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3605 case BEGWORD:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3606 case ENDWORD:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3607 case LIMWORD:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3608 case NOTLIMWORD:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3609 if (!d->localeinfo.multibyte)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3610 continue;
18914
886945d1fa95 manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents: 18752
diff changeset
3611 FALLTHROUGH;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3612 case BACKREF:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3613 case MBCSET:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3614 return false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3615 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3616 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3617 return true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3618 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3619
39861
5d7b30167723 dfa: trivial comment fix: s/is/if/
Jim Meyering <meyering@fb.com>
parents: 39860
diff changeset
3620 /* Disable use of the superset DFA if it is not likely to help
39860
fd9996b911ad dfa: use more-informative function name
Paul Eggert <eggert@cs.ucla.edu>
parents: 39859
diff changeset
3621 performance. */
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3622 static void
39860
fd9996b911ad dfa: use more-informative function name
Paul Eggert <eggert@cs.ucla.edu>
parents: 39859
diff changeset
3623 maybe_disable_superset_dfa (struct dfa *d)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3624 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3625 if (!d->localeinfo.using_utf8)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3626 return;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3627
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3628 bool have_backref = false;
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3629 for (size_t i = 0; i < d->tindex; ++i)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3630 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3631 switch (d->tokens[i])
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3632 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3633 case ANYCHAR:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3634 /* Lowered. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3635 abort ();
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3636 case BACKREF:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3637 have_backref = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3638 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3639 case MBCSET:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3640 /* Requires multi-byte algorithm. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3641 return;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3642 default:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3643 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3644 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3645 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3646
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3647 if (!have_backref && d->superset)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3648 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3649 /* The superset DFA is not likely to be much faster, so remove it. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3650 dfafree (d->superset);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3651 free (d->superset);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3652 d->superset = NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3653 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3654
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3655 free_mbdata (d);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3656 d->localeinfo.multibyte = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3657 d->dfaexec = dfaexec_sb;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3658 d->fast = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3659 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3660
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3661 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3662 dfassbuild (struct dfa *d)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3663 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3664 struct dfa *sup = dfaalloc ();
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3665
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3666 *sup = *d;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3667 sup->localeinfo.multibyte = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3668 sup->dfaexec = dfaexec_sb;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3669 sup->multibyte_prop = NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3670 sup->superset = NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3671 sup->states = NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3672 sup->sindex = 0;
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
3673 sup->constraints = NULL;
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
3674 sup->separates = NULL;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3675 sup->follows = NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3676 sup->tralloc = 0;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3677 sup->trans = NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3678 sup->fails = NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3679 sup->success = NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3680 sup->newlines = NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3681
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3682 sup->charclasses = xnmalloc (sup->calloc, sizeof *sup->charclasses);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3683 if (d->cindex)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3684 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3685 memcpy (sup->charclasses, d->charclasses,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3686 d->cindex * sizeof *sup->charclasses);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3687 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3688
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3689 sup->tokens = xnmalloc (d->tindex, 2 * sizeof *sup->tokens);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3690 sup->talloc = d->tindex * 2;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3691
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3692 bool have_achar = false;
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3693 bool have_nchar = false;
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3694 size_t j;
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3695 for (size_t i = j = 0; i < d->tindex; i++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3696 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3697 switch (d->tokens[i])
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3698 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3699 case ANYCHAR:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3700 case MBCSET:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3701 case BACKREF:
18525
1545248f9c57 dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents: 18524
diff changeset
3702 {
1545248f9c57 dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents: 18524
diff changeset
3703 charclass ccl;
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
3704 fillset (&ccl);
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
3705 sup->tokens[j++] = CSET + charclass_index (sup, &ccl);
18525
1545248f9c57 dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents: 18524
diff changeset
3706 sup->tokens[j++] = STAR;
1545248f9c57 dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents: 18524
diff changeset
3707 if (d->tokens[i + 1] == QMARK || d->tokens[i + 1] == STAR
1545248f9c57 dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents: 18524
diff changeset
3708 || d->tokens[i + 1] == PLUS)
1545248f9c57 dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents: 18524
diff changeset
3709 i++;
1545248f9c57 dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents: 18524
diff changeset
3710 have_achar = true;
1545248f9c57 dfa: simplify with new function fillset
Paul Eggert <eggert@cs.ucla.edu>
parents: 18524
diff changeset
3711 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3712 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3713 case BEGWORD:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3714 case ENDWORD:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3715 case LIMWORD:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3716 case NOTLIMWORD:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3717 if (d->localeinfo.multibyte)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3718 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3719 /* These constraints aren't supported in a multibyte locale.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3720 Ignore them in the superset DFA. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3721 sup->tokens[j++] = EMPTY;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3722 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3723 }
18914
886945d1fa95 manywarnings: update for GCC 7
Paul Eggert <eggert@cs.ucla.edu>
parents: 18752
diff changeset
3724 FALLTHROUGH;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3725 default:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3726 sup->tokens[j++] = d->tokens[i];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3727 if ((0 <= d->tokens[i] && d->tokens[i] < NOTCHAR)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3728 || d->tokens[i] >= CSET)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3729 have_nchar = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3730 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3731 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3732 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3733 sup->tindex = j;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3734
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3735 if (have_nchar && (have_achar || d->localeinfo.multibyte))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3736 d->superset = sup;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3737 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3738 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3739 dfafree (sup);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3740 free (sup);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3741 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3742 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3743
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3744 /* Parse and analyze a single string of the given length. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3745 void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3746 dfacomp (char const *s, size_t len, struct dfa *d, bool searchflag)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3747 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3748 dfaparse (s, len, d);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3749 dfassbuild (d);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3750
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3751 if (dfa_supported (d))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3752 {
39860
fd9996b911ad dfa: use more-informative function name
Paul Eggert <eggert@cs.ucla.edu>
parents: 39859
diff changeset
3753 maybe_disable_superset_dfa (d);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3754 dfaanalyze (d, searchflag);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3755 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3756 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3757 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3758 d->dfaexec = dfaexec_noop;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3759 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3760
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3761 if (d->superset)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3762 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3763 d->fast = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3764 dfaanalyze (d->superset, searchflag);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3765 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3766 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3767
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3768 /* Free the storage held by the components of a dfa. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3769 void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3770 dfafree (struct dfa *d)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3771 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3772 free (d->charclasses);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3773 free (d->tokens);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3774
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3775 if (d->localeinfo.multibyte)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3776 free_mbdata (d);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3777
39956
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
3778 free (d->constraints);
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
3779 free (d->separates);
4fee19f467e5 dfa: a state has a set of current positions.
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 39955
diff changeset
3780
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3781 for (size_t i = 0; i < d->sindex; ++i)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3782 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3783 free (d->states[i].elems.elems);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3784 free (d->states[i].mbps.elems);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3785 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3786 free (d->states);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3787
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3788 if (d->follows)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3789 {
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3790 for (size_t i = 0; i < d->tindex; ++i)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3791 free (d->follows[i].elems);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3792 free (d->follows);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3793 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3794
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3795 if (d->trans)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3796 {
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3797 for (size_t i = 0; i < d->tralloc; ++i)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3798 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3799 free (d->trans[i]);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3800 free (d->fails[i]);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3801 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3802
18523
503cb4e4af32 dfa: addition of new state on demand
Norihiro Tanaka <noritnk@kcn.ne.jp>
parents: 18519
diff changeset
3803 free (d->trans - 2);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3804 free (d->fails);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3805 free (d->newlines);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3806 free (d->success);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3807 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3808
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3809 if (d->superset)
39760
9e30fb88528f dfa: fix memory leak
Assaf Gordon <assafgordon@gmail.com>
parents: 39722
diff changeset
3810 {
9e30fb88528f dfa: fix memory leak
Assaf Gordon <assafgordon@gmail.com>
parents: 39722
diff changeset
3811 dfafree (d->superset);
9e30fb88528f dfa: fix memory leak
Assaf Gordon <assafgordon@gmail.com>
parents: 39722
diff changeset
3812 free (d->superset);
9e30fb88528f dfa: fix memory leak
Assaf Gordon <assafgordon@gmail.com>
parents: 39722
diff changeset
3813 }
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3814 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3815
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3816 /* Having found the postfix representation of the regular expression,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3817 try to find a long sequence of characters that must appear in any line
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3818 containing the r.e.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3819 Finding a "longest" sequence is beyond the scope here;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3820 we take an easy way out and hope for the best.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3821 (Take "(ab|a)b"--please.)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3822
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3823 We do a bottom-up calculation of sequences of characters that must appear
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3824 in matches of r.e.'s represented by trees rooted at the nodes of the postfix
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3825 representation:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3826 sequences that must appear at the left of the match ("left")
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3827 sequences that must appear at the right of the match ("right")
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3828 lists of sequences that must appear somewhere in the match ("in")
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3829 sequences that must constitute the match ("is")
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3830
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3831 When we get to the root of the tree, we use one of the longest of its
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3832 calculated "in" sequences as our answer.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3833
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3834 The sequences calculated for the various types of node (in pseudo ANSI c)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3835 are shown below. "p" is the operand of unary operators (and the left-hand
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3836 operand of binary operators); "q" is the right-hand operand of binary
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3837 operators.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3838
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3839 "ZERO" means "a zero-length sequence" below.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3840
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3841 Type left right is in
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3842 ---- ---- ----- -- --
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3843 char c # c # c # c # c
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3844
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3845 ANYCHAR ZERO ZERO ZERO ZERO
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3846
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3847 MBCSET ZERO ZERO ZERO ZERO
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3848
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3849 CSET ZERO ZERO ZERO ZERO
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3850
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3851 STAR ZERO ZERO ZERO ZERO
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3852
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3853 QMARK ZERO ZERO ZERO ZERO
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3854
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3855 PLUS p->left p->right ZERO p->in
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3856
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3857 CAT (p->is==ZERO)? (q->is==ZERO)? (p->is!=ZERO && p->in plus
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3858 p->left : q->right : q->is!=ZERO) ? q->in plus
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3859 p->is##q->left p->right##q->is p->is##q->is : p->right##q->left
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3860 ZERO
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3861
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3862 OR longest common longest common (do p->is and substrings common
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3863 leading trailing to q->is have same p->in and
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3864 (sub)sequence (sub)sequence q->in length and content) ?
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3865 of p->left of p->right
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3866 and q->left and q->right p->is : NULL
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3867
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3868 If there's anything else we recognize in the tree, all four sequences get set
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3869 to zero-length sequences. If there's something we don't recognize in the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3870 tree, we just return a zero-length sequence.
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3871
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3872 Break ties in favor of infrequent letters (choosing 'zzz' in preference to
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3873 'aaa')?
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3874
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3875 And ... is it here or someplace that we might ponder "optimizations" such as
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3876 egrep 'psi|epsilon' -> egrep 'psi'
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3877 egrep 'pepsi|epsilon' -> egrep 'epsi'
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3878 (Yes, we now find "epsi" as a "string
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3879 that must occur", but we might also
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3880 simplify the *entire* r.e. being sought)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3881 grep '[c]' -> grep 'c'
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3882 grep '(ab|a)b' -> grep 'ab'
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3883 grep 'ab*' -> grep 'a'
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3884 grep 'a*b' -> grep 'b'
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3885
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3886 There are several issues:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3887
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3888 Is optimization easy (enough)?
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3889
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3890 Does optimization actually accomplish anything,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3891 or is the automaton you get from "psi|epsilon" (for example)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3892 the same as the one you get from "psi" (for example)?
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3893
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3894 Are optimizable r.e.'s likely to be used in real-life situations
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3895 (something like 'ab*' is probably unlikely; something like is
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3896 'psi|epsilon' is likelier)? */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3897
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3898 static char *
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3899 icatalloc (char *old, char const *new)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3900 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3901 size_t newsize = strlen (new);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3902 if (newsize == 0)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3903 return old;
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3904 size_t oldsize = strlen (old);
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3905 char *result = xrealloc (old, oldsize + newsize + 1);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3906 memcpy (result + oldsize, new, newsize + 1);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3907 return result;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3908 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3909
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3910 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3911 freelist (char **cpp)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3912 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3913 while (*cpp)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3914 free (*cpp++);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3915 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3916
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3917 static char **
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3918 enlist (char **cpp, char *new, size_t len)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3919 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3920 new = memcpy (xmalloc (len + 1), new, len);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3921 new[len] = '\0';
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3922 /* Is there already something in the list that's new (or longer)? */
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3923 size_t i;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3924 for (i = 0; cpp[i] != NULL; ++i)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3925 if (strstr (cpp[i], new) != NULL)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3926 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3927 free (new);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3928 return cpp;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3929 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3930 /* Eliminate any obsoleted strings. */
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3931 for (size_t j = 0; cpp[j] != NULL; )
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3932 if (strstr (new, cpp[j]) == NULL)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3933 ++j;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3934 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3935 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3936 free (cpp[j]);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3937 if (--i == j)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3938 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3939 cpp[j] = cpp[i];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3940 cpp[i] = NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3941 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3942 /* Add the new string. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3943 cpp = xnrealloc (cpp, i + 2, sizeof *cpp);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3944 cpp[i] = new;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3945 cpp[i + 1] = NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3946 return cpp;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3947 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3948
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3949 /* Given pointers to two strings, return a pointer to an allocated
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3950 list of their distinct common substrings. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3951 static char **
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3952 comsubs (char *left, char const *right)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3953 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3954 char **cpp = xzalloc (sizeof *cpp);
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3955
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
3956 for (char *lcp = left; *lcp != '\0'; lcp++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3957 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3958 size_t len = 0;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3959 char *rcp = strchr (right, *lcp);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3960 while (rcp != NULL)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3961 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3962 size_t i;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3963 for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3964 continue;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3965 if (i > len)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3966 len = i;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3967 rcp = strchr (rcp + 1, *lcp);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3968 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3969 if (len != 0)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3970 cpp = enlist (cpp, lcp, len);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3971 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3972 return cpp;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3973 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3974
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3975 static char **
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3976 addlists (char **old, char **new)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3977 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3978 for (; *new; new++)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3979 old = enlist (old, *new, strlen (*new));
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3980 return old;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3981 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3982
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3983 /* Given two lists of substrings, return a new list giving substrings
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3984 common to both. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3985 static char **
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3986 inboth (char **left, char **right)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3987 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3988 char **both = xzalloc (sizeof *both);
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3989
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3990 for (size_t lnum = 0; left[lnum] != NULL; ++lnum)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3991 {
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
3992 for (size_t rnum = 0; right[rnum] != NULL; ++rnum)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3993 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3994 char **temp = comsubs (left[lnum], right[rnum]);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3995 both = addlists (both, temp);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3996 freelist (temp);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3997 free (temp);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3998 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
3999 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4000 return both;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4001 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4002
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4003 typedef struct must must;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4004
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4005 struct must
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4006 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4007 char **in;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4008 char *left;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4009 char *right;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4010 char *is;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4011 bool begline;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4012 bool endline;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4013 must *prev;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4014 };
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4015
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4016 static must *
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4017 allocmust (must *mp, size_t size)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4018 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4019 must *new_mp = xmalloc (sizeof *new_mp);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4020 new_mp->in = xzalloc (sizeof *new_mp->in);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4021 new_mp->left = xzalloc (size);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4022 new_mp->right = xzalloc (size);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4023 new_mp->is = xzalloc (size);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4024 new_mp->begline = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4025 new_mp->endline = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4026 new_mp->prev = mp;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4027 return new_mp;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4028 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4029
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4030 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4031 resetmust (must *mp)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4032 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4033 freelist (mp->in);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4034 mp->in[0] = NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4035 mp->left[0] = mp->right[0] = mp->is[0] = '\0';
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4036 mp->begline = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4037 mp->endline = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4038 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4039
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4040 static void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4041 freemust (must *mp)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4042 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4043 freelist (mp->in);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4044 free (mp->in);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4045 free (mp->left);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4046 free (mp->right);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4047 free (mp->is);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4048 free (mp);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4049 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4050
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4051 struct dfamust *
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4052 dfamust (struct dfa const *d)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4053 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4054 must *mp = NULL;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4055 char const *result = "";
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4056 bool exact = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4057 bool begline = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4058 bool endline = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4059 bool need_begline = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4060 bool need_endline = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4061 bool case_fold_unibyte = d->syntax.case_fold && MB_CUR_MAX == 1;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4062
39859
1f2a63e46815 dfa: tweak allocation performance
Paul Eggert <eggert@cs.ucla.edu>
parents: 39858
diff changeset
4063 for (size_t ri = 1; ri + 1 < d->tindex; ri++)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4064 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4065 token t = d->tokens[ri];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4066 switch (t)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4067 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4068 case BEGLINE:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4069 mp = allocmust (mp, 2);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4070 mp->begline = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4071 need_begline = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4072 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4073 case ENDLINE:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4074 mp = allocmust (mp, 2);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4075 mp->endline = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4076 need_endline = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4077 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4078 case LPAREN:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4079 case RPAREN:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4080 assert (!"neither LPAREN nor RPAREN may appear here");
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4081
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4082 case EMPTY:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4083 case BEGWORD:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4084 case ENDWORD:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4085 case LIMWORD:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4086 case NOTLIMWORD:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4087 case BACKREF:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4088 case ANYCHAR:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4089 case MBCSET:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4090 mp = allocmust (mp, 2);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4091 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4092
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4093 case STAR:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4094 case QMARK:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4095 resetmust (mp);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4096 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4097
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4098 case OR:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4099 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4100 char **new;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4101 must *rmp = mp;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4102 must *lmp = mp = mp->prev;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4103 size_t j, ln, rn, n;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4104
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4105 /* Guaranteed to be. Unlikely, but ... */
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
4106 if (streq (lmp->is, rmp->is))
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4107 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4108 lmp->begline &= rmp->begline;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4109 lmp->endline &= rmp->endline;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4110 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4111 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4112 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4113 lmp->is[0] = '\0';
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4114 lmp->begline = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4115 lmp->endline = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4116 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4117 /* Left side--easy */
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
4118 size_t i = 0;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4119 while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i])
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4120 ++i;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4121 lmp->left[i] = '\0';
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4122 /* Right side */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4123 ln = strlen (lmp->right);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4124 rn = strlen (rmp->right);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4125 n = ln;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4126 if (n > rn)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4127 n = rn;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4128 for (i = 0; i < n; ++i)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4129 if (lmp->right[ln - i - 1] != rmp->right[rn - i - 1])
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4130 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4131 for (j = 0; j < i; ++j)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4132 lmp->right[j] = lmp->right[(ln - i) + j];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4133 lmp->right[j] = '\0';
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4134 new = inboth (lmp->in, rmp->in);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4135 freelist (lmp->in);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4136 free (lmp->in);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4137 lmp->in = new;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4138 freemust (rmp);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4139 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4140 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4141
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4142 case PLUS:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4143 mp->is[0] = '\0';
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4144 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4145
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4146 case END:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4147 assert (!mp->prev);
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
4148 for (size_t i = 0; mp->in[i] != NULL; ++i)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4149 if (strlen (mp->in[i]) > strlen (result))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4150 result = mp->in[i];
18630
3974d9d184ab dfa: prefer functions and constants to macros
Paul Eggert <eggert@cs.ucla.edu>
parents: 18629
diff changeset
4151 if (streq (result, mp->is))
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4152 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4153 if ((!need_begline || mp->begline) && (!need_endline
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4154 || mp->endline))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4155 exact = true;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4156 begline = mp->begline;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4157 endline = mp->endline;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4158 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4159 goto done;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4160
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4161 case CAT:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4162 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4163 must *rmp = mp;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4164 must *lmp = mp = mp->prev;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4165
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4166 /* In. Everything in left, plus everything in
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4167 right, plus concatenation of
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4168 left's right and right's left. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4169 lmp->in = addlists (lmp->in, rmp->in);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4170 if (lmp->right[0] != '\0' && rmp->left[0] != '\0')
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4171 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4172 size_t lrlen = strlen (lmp->right);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4173 size_t rllen = strlen (rmp->left);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4174 char *tp = xmalloc (lrlen + rllen);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4175 memcpy (tp, lmp->right, lrlen);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4176 memcpy (tp + lrlen, rmp->left, rllen);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4177 lmp->in = enlist (lmp->in, tp, lrlen + rllen);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4178 free (tp);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4179 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4180 /* Left-hand */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4181 if (lmp->is[0] != '\0')
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4182 lmp->left = icatalloc (lmp->left, rmp->left);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4183 /* Right-hand */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4184 if (rmp->is[0] == '\0')
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4185 lmp->right[0] = '\0';
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4186 lmp->right = icatalloc (lmp->right, rmp->right);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4187 /* Guaranteed to be */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4188 if ((lmp->is[0] != '\0' || lmp->begline)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4189 && (rmp->is[0] != '\0' || rmp->endline))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4190 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4191 lmp->is = icatalloc (lmp->is, rmp->is);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4192 lmp->endline = rmp->endline;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4193 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4194 else
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4195 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4196 lmp->is[0] = '\0';
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4197 lmp->begline = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4198 lmp->endline = false;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4199 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4200 freemust (rmp);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4201 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4202 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4203
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4204 case '\0':
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4205 /* Not on *my* shift. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4206 goto done;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4207
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4208 default:
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4209 if (CSET <= t)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4210 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4211 /* If T is a singleton, or if case-folding in a unibyte
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4212 locale and T's members all case-fold to the same char,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4213 convert T to one of its members. Otherwise, do
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4214 nothing further with T. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4215 charclass *ccl = &d->charclasses[t - CSET];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4216 int j;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4217 for (j = 0; j < NOTCHAR; j++)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
4218 if (tstbit (j, ccl))
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4219 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4220 if (! (j < NOTCHAR))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4221 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4222 mp = allocmust (mp, 2);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4223 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4224 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4225 t = j;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4226 while (++j < NOTCHAR)
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
4227 if (tstbit (j, ccl)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4228 && ! (case_fold_unibyte
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4229 && toupper (j) == toupper (t)))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4230 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4231 if (j < NOTCHAR)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4232 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4233 mp = allocmust (mp, 2);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4234 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4235 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4236 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4237
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
4238 size_t rj = ri + 2;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4239 if (d->tokens[ri + 1] == CAT)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4240 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4241 for (; rj < d->tindex - 1; rj += 2)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4242 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4243 if ((rj != ri && (d->tokens[rj] <= 0
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4244 || NOTCHAR <= d->tokens[rj]))
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4245 || d->tokens[rj + 1] != CAT)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4246 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4247 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4248 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4249 mp = allocmust (mp, ((rj - ri) >> 1) + 1);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4250 mp->is[0] = mp->left[0] = mp->right[0]
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4251 = case_fold_unibyte ? toupper (t) : t;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4252
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
4253 size_t i;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4254 for (i = 1; ri + 2 < rj; i++)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4255 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4256 ri += 2;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4257 t = d->tokens[ri];
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4258 mp->is[i] = mp->left[i] = mp->right[i]
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4259 = case_fold_unibyte ? toupper (t) : t;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4260 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4261 mp->is[i] = mp->left[i] = mp->right[i] = '\0';
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4262 mp->in = enlist (mp->in, mp->is, i);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4263 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4264 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4265 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4266 done:;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4267
18629
32aa1933afb8 dfa: narrow more local var scopes
Paul Eggert <eggert@cs.ucla.edu>
parents: 18628
diff changeset
4268 struct dfamust *dm = NULL;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4269 if (*result)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4270 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4271 dm = xmalloc (sizeof *dm);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4272 dm->exact = exact;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4273 dm->begline = begline;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4274 dm->endline = endline;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4275 dm->must = xstrdup (result);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4276 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4277
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4278 while (mp)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4279 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4280 must *prev = mp->prev;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4281 freemust (mp);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4282 mp = prev;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4283 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4284
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4285 return dm;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4286 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4287
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4288 void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4289 dfamustfree (struct dfamust *dm)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4290 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4291 free (dm->must);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4292 free (dm);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4293 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4294
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4295 struct dfa *
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4296 dfaalloc (void)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4297 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4298 return xmalloc (sizeof (struct dfa));
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4299 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4300
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4301 /* Initialize DFA. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4302 void
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4303 dfasyntax (struct dfa *dfa, struct localeinfo const *linfo,
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4304 reg_syntax_t bits, int dfaopts)
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4305 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4306 memset (dfa, 0, offsetof (struct dfa, dfaexec));
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4307 dfa->dfaexec = linfo->multibyte ? dfaexec_mb : dfaexec_sb;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4308 dfa->simple_locale = using_simple_locale (linfo->multibyte);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4309 dfa->localeinfo = *linfo;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4310
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4311 dfa->fast = !dfa->localeinfo.multibyte;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4312
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4313 dfa->canychar = -1;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4314 dfa->lex.cur_mb_len = 1;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4315 dfa->syntax.syntax_bits_set = true;
18557
34cdc221276c dfa: remove DFA_CASE_FOLD flag in favor of RE_ICASE
Paul Eggert <eggert@cs.ucla.edu>
parents: 18556
diff changeset
4316 dfa->syntax.case_fold = (bits & RE_ICASE) != 0;
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4317 dfa->syntax.anchor = (dfaopts & DFA_ANCHOR) != 0;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4318 dfa->syntax.eolbyte = dfaopts & DFA_EOL_NUL ? '\0' : '\n';
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4319 dfa->syntax.syntax_bits = bits;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4320
18628
dbd0afa797c5 dfa: narrow the scope of many local variables
Jim Meyering <meyering@fb.com>
parents: 18626
diff changeset
4321 for (int i = CHAR_MIN; i <= CHAR_MAX; ++i)
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4322 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4323 unsigned char uc = i;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4324
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4325 dfa->syntax.sbit[uc] = char_context (dfa, uc);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4326 switch (dfa->syntax.sbit[uc])
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4327 {
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4328 case CTX_LETTER:
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
4329 setbit (uc, &dfa->syntax.letters);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4330 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4331 case CTX_NEWLINE:
18618
500f7d1fe5a2 dfa: wrap charclass inside a struct
Paul Eggert <eggert@cs.ucla.edu>
parents: 18608
diff changeset
4332 setbit (uc, &dfa->syntax.newline);
18410
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4333 break;
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4334 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4335
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4336 /* POSIX requires that the five bytes in "\n\r./" (including the
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4337 terminating NUL) cannot occur inside a multibyte character. */
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4338 dfa->syntax.never_trail[uc] = (dfa->localeinfo.using_utf8
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4339 ? (uc & 0xc0) != 0x80
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4340 : strchr ("\n\r./", uc) != NULL);
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4341 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4342 }
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4343
a8d2b9364721 dfa: new module, importing grep's DFA matcher
Jim Meyering <meyering@fb.com>
parents:
diff changeset
4344 /* vim:set shiftwidth=2: */