changeset 40047:183a2f6b0b16

revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7 v0.1-2213-gae4b73e28 caused a regression in grep-3.2 (no match): echo '123-x'|LC_ALL=C grep -E '.\bx' The goal is to revert the first, but reverting it requires to restore the function deleted in the second. I ran this to restore the deleted function: git show v0.1-2281-g95cd86dd7 lib/dfa.c \ | perl -0777 -pe 's/^@@[^\n]*dfaan.*//ms' \ | patch -R -p1 * lib/dfa.c (charclass_context): Restore deleted function. Reverting the primary commit removes this change: dfa: Simplify a building state * lib/dfa.c (build_state): Simplify a building state.
author Jim Meyering <meyering@fb.com>
date Thu, 20 Dec 2018 19:51:48 -0800
parents cc26384787bc
children 59dbafac4d42
files ChangeLog lib/dfa.c
diffstat 2 files changed, 49 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Thu Dec 20 16:10:29 2018 -0800
+++ b/ChangeLog	Thu Dec 20 19:51:48 2018 -0800
@@ -1,3 +1,19 @@
+2018-12-20  Jim Meyering  <meyering@fb.com>
+
+	revert v0.1-2213-gae4b73e28 and part of v0.1-2281-g95cd86dd7
+	v0.1-2213-gae4b73e28 caused a regression in grep-3.2 (no match):
+	  echo '123-x'|LC_ALL=C grep -E '.\bx'
+	The goal is to revert the first, but reverting it requires to restore
+	the function deleted in the second. I ran this to restore the deleted
+	function:
+	  git show v0.1-2281-g95cd86dd7 lib/dfa.c \
+	    | perl -0777 -pe 's/^@@[^\n]*dfaan.*//ms' \
+	    | patch -R -p1
+	* lib/dfa.c (charclass_context): Restore deleted function.
+	Reverting the primary commit removes this change:
+	dfa: Simplify a building state
+	* lib/dfa.c (build_state): Simplify a building state.
+
 2018-12-20  Paul Eggert  <eggert@cs.ucla.edu>
 
 	version-etc: allow zero authors
--- a/lib/dfa.c	Thu Dec 20 16:10:29 2018 -0800
+++ b/lib/dfa.c	Thu Dec 20 19:51:48 2018 -0800
@@ -2300,6 +2300,27 @@
   free (tmp.elems);
 }
 
+/* Returns the set of contexts for which there is at least one
+   character included in C.  */
+
+static int
+charclass_context (struct dfa const *dfa, charclass const *c)
+{
+  int context = 0;
+
+  for (unsigned int j = 0; j < CHARCLASS_WORDS; ++j)
+    {
+      if (c->w[j] & dfa->syntax.newline.w[j])
+        context |= CTX_NEWLINE;
+      if (c->w[j] & dfa->syntax.letters.w[j])
+        context |= CTX_LETTER;
+      if (c->w[j] & ~(dfa->syntax.letters.w[j] | dfa->syntax.newline.w[j]))
+        context |= CTX_NONE;
+    }
+
+  return context;
+}
+
 /* Returns the contexts on which the position set S depends.  Each context
    in the set of returned contexts (let's call it SC) may have a different
    follow set than other contexts in SC, and also different from the
@@ -3091,18 +3112,22 @@
       /* Find out if the new state will want any context information,
          by calculating possible contexts that the group can match,
          and separate contexts that the new state wants to know.  */
+      int possible_contexts = charclass_context (d, &label);
       int separate_contexts = state_separate_contexts (d, &group);
 
       /* Find the state(s) corresponding to the union of the follows.  */
-      if (d->syntax.sbit[uc] & separate_contexts & CTX_NEWLINE)
-        state = state_index (d, &group, CTX_NEWLINE);
-      else if (d->syntax.sbit[uc] & separate_contexts & CTX_LETTER)
-        state = state_index (d, &group, CTX_LETTER);
+      if (possible_contexts & ~separate_contexts)
+        state = state_index (d, &group, separate_contexts ^ CTX_ANY);
       else
-        state = state_index (d, &group, separate_contexts ^ CTX_ANY);
-
-      state_newline = state;
-      state_letter = state;
+        state = -1;
+      if (separate_contexts & possible_contexts & CTX_NEWLINE)
+        state_newline = state_index (d, &group, CTX_NEWLINE);
+      else
+        state_newline = state;
+      if (separate_contexts & possible_contexts & CTX_LETTER)
+        state_letter = state_index (d, &group, CTX_LETTER);
+      else
+        state_letter = state;
 
       /* Reallocate now, to reallocate any newline transition properly.  */
       realloc_trans_if_necessary (d);