changeset 39859:1f2a63e46815

dfa: tweak allocation performance * lib/dfa.c (merge_nfa_state, dfaoptimize): Prefer ptrdiff_t for indexes some more. Use char for flags, as it’s wide enough. Allocate queue and flags together, with one malloc call. No need to use xnmalloc since the multiplication and addition cannot overflow (it’s already been checked by earlier allocation). Prefer memset to open-coding.
author Paul Eggert <eggert@cs.ucla.edu>
date Tue, 18 Sep 2018 19:05:26 -0700
parents f1a9693c37be
children fd9996b911ad
files ChangeLog lib/dfa.c
diffstat 2 files changed, 27 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Tue Sep 18 18:24:27 2018 -0700
+++ b/ChangeLog	Tue Sep 18 19:05:26 2018 -0700
@@ -1,5 +1,14 @@
 2018-09-18  Paul Eggert  <eggert@cs.ucla.edu>
 
+	dfa: tweak allocation performance
+	* lib/dfa.c (merge_nfa_state, dfaoptimize):
+	Prefer ptrdiff_t for indexes some more.
+	Use char for flags, as it’s wide enough.
+	Allocate queue and flags together, with one malloc call.
+	No need to use xnmalloc since the multiplication and
+	addition cannot overflow (it’s already been checked by
+	earlier allocation).  Prefer memset to open-coding.
+
 	dfa: prune states as we go
 	* lib/dfa.c (prune): Remove.
 	dfa: reorder enum for efficiency
--- a/lib/dfa.c	Tue Sep 18 18:24:27 2018 -0700
+++ b/lib/dfa.c	Tue Sep 18 19:05:26 2018 -0700
@@ -2342,9 +2342,9 @@
   OPT_QUEUED = (1 << 4)
 };
 
-static size_t
-merge_nfa_state (struct dfa *d, size_t *queue, size_t nqueue, size_t tindex,
-                 int *flags, position_set *merged)
+static ptrdiff_t
+merge_nfa_state (struct dfa *d, size_t *queue, ptrdiff_t nqueue, size_t tindex,
+                 char *flags, position_set *merged)
 {
   position_set *follows = d->follows;
   ptrdiff_t nelem = 0;
@@ -2369,7 +2369,7 @@
       if (flags[dindex] & (OPT_LPAREN | OPT_RPAREN))
         continue;
 
-      for (size_t j = i + 1; j < follows[tindex].nelem; j++)
+      for (ptrdiff_t j = i + 1; j < follows[tindex].nelem; j++)
         {
           size_t sindex = follows[tindex].elems[j].index;
 
@@ -2404,28 +2404,27 @@
 static void
 dfaoptimize (struct dfa *d)
 {
-  int *flags;
+  char *flags;
   size_t *queue;
-  size_t nqueue;
+  ptrdiff_t nqueue;
   position_set merged0;
   position_set *merged;
-
-  flags = xnmalloc (d->tindex, sizeof *flags);
-  queue = xnmalloc (d->nleaves, sizeof *queue);
-
-  for (size_t i = 0; i < d->tindex; ++i)
-    flags[i] = 0;
-
-  for (size_t i = 0; i < d->tindex; ++i)
+  ptrdiff_t extra_space = d->tindex + sizeof *queue - 1;
+  extra_space -= extra_space % sizeof *queue;
+
+  queue = xmalloc (d->nleaves * sizeof *queue + extra_space);
+  flags = (char *) (queue + d->nleaves);
+  memset (flags, 0, d->tindex * sizeof *flags);
+
+  for (size_t i = 0; i < d->tindex; i++)
     {
-      for (size_t j = 0; j < d->follows[i].nelem; j++)
+      for (ptrdiff_t j = 0; j < d->follows[i].nelem; j++)
         {
           if (d->follows[i].elems[j].index == i)
             flags[d->follows[i].elems[j].index] |= OPT_REPEAT;
           else if (d->follows[i].elems[j].index < i)
             flags[d->follows[i].elems[j].index] |= OPT_LPAREN;
-          else if (flags[d->follows[i].elems[j].index] &=
-                   OPT_WALKED)
+          else if (flags[d->follows[i].elems[j].index] &= OPT_WALKED)
             flags[d->follows[i].elems[j].index] |= OPT_RPAREN;
           else
             flags[d->follows[i].elems[j].index] |= OPT_WALKED;
@@ -2438,12 +2437,11 @@
   nqueue = 0;
   queue[nqueue++] = 0;
 
-  for (size_t i = 0; i < nqueue; i++)
+  for (ptrdiff_t i = 0; i < nqueue; i++)
     nqueue = merge_nfa_state (d, queue, nqueue, queue[i], flags, merged);
 
   free (merged->elems);
   free (queue);
-  free (flags);
 }
 
 /* Perform bottom-up analysis on the parse tree, computing various functions.
@@ -3921,7 +3919,7 @@
   bool need_endline = false;
   bool case_fold_unibyte = d->syntax.case_fold && MB_CUR_MAX == 1;
 
-  for (size_t ri = 1; ri < d->tindex - 1; ++ri)
+  for (size_t ri = 1; ri + 1 < d->tindex; ri++)
     {
       token t = d->tokens[ri];
       switch (t)