comparison lib/dfa.c @ 39859:1f2a63e46815

dfa: tweak allocation performance * lib/dfa.c (merge_nfa_state, dfaoptimize): Prefer ptrdiff_t for indexes some more. Use char for flags, as it’s wide enough. Allocate queue and flags together, with one malloc call. No need to use xnmalloc since the multiplication and addition cannot overflow (it’s already been checked by earlier allocation). Prefer memset to open-coding.
author Paul Eggert <eggert@cs.ucla.edu>
date Tue, 18 Sep 2018 19:05:26 -0700
parents f1a9693c37be
children 42b9c905d1bb
comparison
equal deleted inserted replaced
39858:f1a9693c37be 39859:1f2a63e46815
2340 2340
2341 /* The node is queued. The node is not queued again. */ 2341 /* The node is queued. The node is not queued again. */
2342 OPT_QUEUED = (1 << 4) 2342 OPT_QUEUED = (1 << 4)
2343 }; 2343 };
2344 2344
2345 static size_t 2345 static ptrdiff_t
2346 merge_nfa_state (struct dfa *d, size_t *queue, size_t nqueue, size_t tindex, 2346 merge_nfa_state (struct dfa *d, size_t *queue, ptrdiff_t nqueue, size_t tindex,
2347 int *flags, position_set *merged) 2347 char *flags, position_set *merged)
2348 { 2348 {
2349 position_set *follows = d->follows; 2349 position_set *follows = d->follows;
2350 ptrdiff_t nelem = 0; 2350 ptrdiff_t nelem = 0;
2351 2351
2352 for (ptrdiff_t i = 0; i < follows[tindex].nelem; i++) 2352 for (ptrdiff_t i = 0; i < follows[tindex].nelem; i++)
2367 } 2367 }
2368 2368
2369 if (flags[dindex] & (OPT_LPAREN | OPT_RPAREN)) 2369 if (flags[dindex] & (OPT_LPAREN | OPT_RPAREN))
2370 continue; 2370 continue;
2371 2371
2372 for (size_t j = i + 1; j < follows[tindex].nelem; j++) 2372 for (ptrdiff_t j = i + 1; j < follows[tindex].nelem; j++)
2373 { 2373 {
2374 size_t sindex = follows[tindex].elems[j].index; 2374 size_t sindex = follows[tindex].elems[j].index;
2375 2375
2376 if (follows[tindex].elems[j].constraint != iconstraint) 2376 if (follows[tindex].elems[j].constraint != iconstraint)
2377 continue; 2377 continue;
2402 } 2402 }
2403 2403
2404 static void 2404 static void
2405 dfaoptimize (struct dfa *d) 2405 dfaoptimize (struct dfa *d)
2406 { 2406 {
2407 int *flags; 2407 char *flags;
2408 size_t *queue; 2408 size_t *queue;
2409 size_t nqueue; 2409 ptrdiff_t nqueue;
2410 position_set merged0; 2410 position_set merged0;
2411 position_set *merged; 2411 position_set *merged;
2412 2412 ptrdiff_t extra_space = d->tindex + sizeof *queue - 1;
2413 flags = xnmalloc (d->tindex, sizeof *flags); 2413 extra_space -= extra_space % sizeof *queue;
2414 queue = xnmalloc (d->nleaves, sizeof *queue); 2414
2415 2415 queue = xmalloc (d->nleaves * sizeof *queue + extra_space);
2416 for (size_t i = 0; i < d->tindex; ++i) 2416 flags = (char *) (queue + d->nleaves);
2417 flags[i] = 0; 2417 memset (flags, 0, d->tindex * sizeof *flags);
2418 2418
2419 for (size_t i = 0; i < d->tindex; ++i) 2419 for (size_t i = 0; i < d->tindex; i++)
2420 { 2420 {
2421 for (size_t j = 0; j < d->follows[i].nelem; j++) 2421 for (ptrdiff_t j = 0; j < d->follows[i].nelem; j++)
2422 { 2422 {
2423 if (d->follows[i].elems[j].index == i) 2423 if (d->follows[i].elems[j].index == i)
2424 flags[d->follows[i].elems[j].index] |= OPT_REPEAT; 2424 flags[d->follows[i].elems[j].index] |= OPT_REPEAT;
2425 else if (d->follows[i].elems[j].index < i) 2425 else if (d->follows[i].elems[j].index < i)
2426 flags[d->follows[i].elems[j].index] |= OPT_LPAREN; 2426 flags[d->follows[i].elems[j].index] |= OPT_LPAREN;
2427 else if (flags[d->follows[i].elems[j].index] &= 2427 else if (flags[d->follows[i].elems[j].index] &= OPT_WALKED)
2428 OPT_WALKED)
2429 flags[d->follows[i].elems[j].index] |= OPT_RPAREN; 2428 flags[d->follows[i].elems[j].index] |= OPT_RPAREN;
2430 else 2429 else
2431 flags[d->follows[i].elems[j].index] |= OPT_WALKED; 2430 flags[d->follows[i].elems[j].index] |= OPT_WALKED;
2432 } 2431 }
2433 } 2432 }
2436 alloc_position_set (merged, d->nleaves); 2435 alloc_position_set (merged, d->nleaves);
2437 2436
2438 nqueue = 0; 2437 nqueue = 0;
2439 queue[nqueue++] = 0; 2438 queue[nqueue++] = 0;
2440 2439
2441 for (size_t i = 0; i < nqueue; i++) 2440 for (ptrdiff_t i = 0; i < nqueue; i++)
2442 nqueue = merge_nfa_state (d, queue, nqueue, queue[i], flags, merged); 2441 nqueue = merge_nfa_state (d, queue, nqueue, queue[i], flags, merged);
2443 2442
2444 free (merged->elems); 2443 free (merged->elems);
2445 free (queue); 2444 free (queue);
2446 free (flags);
2447 } 2445 }
2448 2446
2449 /* Perform bottom-up analysis on the parse tree, computing various functions. 2447 /* Perform bottom-up analysis on the parse tree, computing various functions.
2450 Note that at this point, we're pretending constructs like \< are real 2448 Note that at this point, we're pretending constructs like \< are real
2451 characters rather than constraints on what can follow them. 2449 characters rather than constraints on what can follow them.
3919 bool endline = false; 3917 bool endline = false;
3920 bool need_begline = false; 3918 bool need_begline = false;
3921 bool need_endline = false; 3919 bool need_endline = false;
3922 bool case_fold_unibyte = d->syntax.case_fold && MB_CUR_MAX == 1; 3920 bool case_fold_unibyte = d->syntax.case_fold && MB_CUR_MAX == 1;
3923 3921
3924 for (size_t ri = 1; ri < d->tindex - 1; ++ri) 3922 for (size_t ri = 1; ri + 1 < d->tindex; ri++)
3925 { 3923 {
3926 token t = d->tokens[ri]; 3924 token t = d->tokens[ri];
3927 switch (t) 3925 switch (t)
3928 { 3926 {
3929 case BEGLINE: 3927 case BEGLINE: