Mercurial > gnulib
comparison lib/dfa.c @ 39859:1f2a63e46815
dfa: tweak allocation performance
* lib/dfa.c (merge_nfa_state, dfaoptimize):
Prefer ptrdiff_t for indexes some more.
Use char for flags, as it’s wide enough.
Allocate queue and flags together, with one malloc call.
No need to use xnmalloc since the multiplication and
addition cannot overflow (it’s already been checked by
earlier allocation). Prefer memset to open-coding.
author | Paul Eggert <eggert@cs.ucla.edu> |
---|---|
date | Tue, 18 Sep 2018 19:05:26 -0700 |
parents | f1a9693c37be |
children | 42b9c905d1bb |
comparison
equal
deleted
inserted
replaced
39858:f1a9693c37be | 39859:1f2a63e46815 |
---|---|
2340 | 2340 |
2341 /* The node is queued. The node is not queued again. */ | 2341 /* The node is queued. The node is not queued again. */ |
2342 OPT_QUEUED = (1 << 4) | 2342 OPT_QUEUED = (1 << 4) |
2343 }; | 2343 }; |
2344 | 2344 |
2345 static size_t | 2345 static ptrdiff_t |
2346 merge_nfa_state (struct dfa *d, size_t *queue, size_t nqueue, size_t tindex, | 2346 merge_nfa_state (struct dfa *d, size_t *queue, ptrdiff_t nqueue, size_t tindex, |
2347 int *flags, position_set *merged) | 2347 char *flags, position_set *merged) |
2348 { | 2348 { |
2349 position_set *follows = d->follows; | 2349 position_set *follows = d->follows; |
2350 ptrdiff_t nelem = 0; | 2350 ptrdiff_t nelem = 0; |
2351 | 2351 |
2352 for (ptrdiff_t i = 0; i < follows[tindex].nelem; i++) | 2352 for (ptrdiff_t i = 0; i < follows[tindex].nelem; i++) |
2367 } | 2367 } |
2368 | 2368 |
2369 if (flags[dindex] & (OPT_LPAREN | OPT_RPAREN)) | 2369 if (flags[dindex] & (OPT_LPAREN | OPT_RPAREN)) |
2370 continue; | 2370 continue; |
2371 | 2371 |
2372 for (size_t j = i + 1; j < follows[tindex].nelem; j++) | 2372 for (ptrdiff_t j = i + 1; j < follows[tindex].nelem; j++) |
2373 { | 2373 { |
2374 size_t sindex = follows[tindex].elems[j].index; | 2374 size_t sindex = follows[tindex].elems[j].index; |
2375 | 2375 |
2376 if (follows[tindex].elems[j].constraint != iconstraint) | 2376 if (follows[tindex].elems[j].constraint != iconstraint) |
2377 continue; | 2377 continue; |
2402 } | 2402 } |
2403 | 2403 |
2404 static void | 2404 static void |
2405 dfaoptimize (struct dfa *d) | 2405 dfaoptimize (struct dfa *d) |
2406 { | 2406 { |
2407 int *flags; | 2407 char *flags; |
2408 size_t *queue; | 2408 size_t *queue; |
2409 size_t nqueue; | 2409 ptrdiff_t nqueue; |
2410 position_set merged0; | 2410 position_set merged0; |
2411 position_set *merged; | 2411 position_set *merged; |
2412 | 2412 ptrdiff_t extra_space = d->tindex + sizeof *queue - 1; |
2413 flags = xnmalloc (d->tindex, sizeof *flags); | 2413 extra_space -= extra_space % sizeof *queue; |
2414 queue = xnmalloc (d->nleaves, sizeof *queue); | 2414 |
2415 | 2415 queue = xmalloc (d->nleaves * sizeof *queue + extra_space); |
2416 for (size_t i = 0; i < d->tindex; ++i) | 2416 flags = (char *) (queue + d->nleaves); |
2417 flags[i] = 0; | 2417 memset (flags, 0, d->tindex * sizeof *flags); |
2418 | 2418 |
2419 for (size_t i = 0; i < d->tindex; ++i) | 2419 for (size_t i = 0; i < d->tindex; i++) |
2420 { | 2420 { |
2421 for (size_t j = 0; j < d->follows[i].nelem; j++) | 2421 for (ptrdiff_t j = 0; j < d->follows[i].nelem; j++) |
2422 { | 2422 { |
2423 if (d->follows[i].elems[j].index == i) | 2423 if (d->follows[i].elems[j].index == i) |
2424 flags[d->follows[i].elems[j].index] |= OPT_REPEAT; | 2424 flags[d->follows[i].elems[j].index] |= OPT_REPEAT; |
2425 else if (d->follows[i].elems[j].index < i) | 2425 else if (d->follows[i].elems[j].index < i) |
2426 flags[d->follows[i].elems[j].index] |= OPT_LPAREN; | 2426 flags[d->follows[i].elems[j].index] |= OPT_LPAREN; |
2427 else if (flags[d->follows[i].elems[j].index] &= | 2427 else if (flags[d->follows[i].elems[j].index] &= OPT_WALKED) |
2428 OPT_WALKED) | |
2429 flags[d->follows[i].elems[j].index] |= OPT_RPAREN; | 2428 flags[d->follows[i].elems[j].index] |= OPT_RPAREN; |
2430 else | 2429 else |
2431 flags[d->follows[i].elems[j].index] |= OPT_WALKED; | 2430 flags[d->follows[i].elems[j].index] |= OPT_WALKED; |
2432 } | 2431 } |
2433 } | 2432 } |
2436 alloc_position_set (merged, d->nleaves); | 2435 alloc_position_set (merged, d->nleaves); |
2437 | 2436 |
2438 nqueue = 0; | 2437 nqueue = 0; |
2439 queue[nqueue++] = 0; | 2438 queue[nqueue++] = 0; |
2440 | 2439 |
2441 for (size_t i = 0; i < nqueue; i++) | 2440 for (ptrdiff_t i = 0; i < nqueue; i++) |
2442 nqueue = merge_nfa_state (d, queue, nqueue, queue[i], flags, merged); | 2441 nqueue = merge_nfa_state (d, queue, nqueue, queue[i], flags, merged); |
2443 | 2442 |
2444 free (merged->elems); | 2443 free (merged->elems); |
2445 free (queue); | 2444 free (queue); |
2446 free (flags); | |
2447 } | 2445 } |
2448 | 2446 |
2449 /* Perform bottom-up analysis on the parse tree, computing various functions. | 2447 /* Perform bottom-up analysis on the parse tree, computing various functions. |
2450 Note that at this point, we're pretending constructs like \< are real | 2448 Note that at this point, we're pretending constructs like \< are real |
2451 characters rather than constraints on what can follow them. | 2449 characters rather than constraints on what can follow them. |
3919 bool endline = false; | 3917 bool endline = false; |
3920 bool need_begline = false; | 3918 bool need_begline = false; |
3921 bool need_endline = false; | 3919 bool need_endline = false; |
3922 bool case_fold_unibyte = d->syntax.case_fold && MB_CUR_MAX == 1; | 3920 bool case_fold_unibyte = d->syntax.case_fold && MB_CUR_MAX == 1; |
3923 | 3921 |
3924 for (size_t ri = 1; ri < d->tindex - 1; ++ri) | 3922 for (size_t ri = 1; ri + 1 < d->tindex; ri++) |
3925 { | 3923 { |
3926 token t = d->tokens[ri]; | 3924 token t = d->tokens[ri]; |
3927 switch (t) | 3925 switch (t) |
3928 { | 3926 { |
3929 case BEGLINE: | 3927 case BEGLINE: |