diff lib/regcomp.c @ 6171:5862ee08bfc1

* lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char): (optimize_subexps, lower_subexp): Don't assume 1<<31 has defined behavior on hosts with 32-bit int, since the signed shift might overflow. Use 1u<<31 instead. * lib/regex_internal.h (bitset_set, bitset_clear, bitset_contain): Likewise. * lib/regexec.c (check_dst_limits_calc_pos_1): (check_subexp_matching_top): Likewise. * lib/regcomp.c (optimize_subexps, lower_subexp): Use CHAR_BIT rather than 8, for clarity. * lib/regexec.c (check_dst_limits_calc_pos_1): (check_subexp_matching_top): Likewise. * lib/regcomp.c (init_dfa): Make table_size unsigned, so that we don't have to worry about portability issues when shifting it left. Remove no-longer-needed test for table_size > 0. * lib/regcomp.c (parse_sub_exp): Do not shift more bits than there are in a word, as the resulting behavior is undefined. * lib/regexec.c (check_dst_limits_calc_pos_1): Likewise; in one case, a <= should have been an <, and in another case the whole test was missing. * lib/regex_internal.h (BYTE_BITS): Remove. All uses changed to the standard name CHAR_BIT. * lib/regexec.c (match_ctx_add_entry): Don't assume that ~0 == -1; this is not true on one's complement and signed-magnitude hosts.
author Paul Eggert <eggert@cs.ucla.edu>
date Wed, 31 Aug 2005 19:38:13 +0000
parents a10e4460ad4c
children 6039b763ad3c
line wrap: on
line diff
--- a/lib/regcomp.c	Wed Aug 31 18:08:34 2005 +0000
+++ b/lib/regcomp.c	Wed Aug 31 19:38:13 2005 +0000
@@ -336,7 +336,7 @@
 	  int i, j, ch;
 	  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
 	    for (j = 0; j < UINT_BITS; ++j, ++ch)
-	      if (dfa->nodes[node].opr.sbcset[i] & (1 << j))
+	      if (dfa->nodes[node].opr.sbcset[i] & (1u << j))
 		re_set_fastmap (fastmap, icase, ch);
 	}
 #ifdef RE_ENABLE_I18N
@@ -798,7 +798,7 @@
 static reg_errcode_t
 init_dfa (re_dfa_t *dfa, int pat_len)
 {
-  int table_size;
+  unsigned int table_size;
 #ifndef _LIBC
   char *codeset_name;
 #endif
@@ -812,7 +812,7 @@
   dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
 
   /*  table_size = 2 ^ ceil(log pat_len) */
-  for (table_size = 1; table_size > 0; table_size <<= 1)
+  for (table_size = 1; ; table_size <<= 1)
     if (table_size > pat_len)
       break;
 
@@ -872,7 +872,7 @@
 	      {
 		wint_t wch = __btowc (ch);
 		if (wch != WEOF)
-		  dfa->sb_char[i] |= 1 << j;
+		  dfa->sb_char[i] |= 1u << j;
 # ifndef _LIBC
 		if (isascii (ch) && wch != ch)
 		  dfa->map_notascii = 1;
@@ -899,7 +899,7 @@
   for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
     for (j = 0; j < UINT_BITS; ++j, ++ch)
       if (isalnum (ch) || ch == '_')
-	dfa->word_char[i] |= 1 << j;
+	dfa->word_char[i] |= 1u << j;
 }
 
 /* Free the work area which are only used while compiling.  */
@@ -1222,8 +1222,8 @@
         node->left->parent = node;
 
       dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
-      if (other_idx < 8 * sizeof (dfa->used_bkref_map))
-	dfa->used_bkref_map &= ~(1 << other_idx);
+      if (other_idx < CHAR_BIT * sizeof dfa->used_bkref_map)
+	dfa->used_bkref_map &= ~(1u << other_idx);
     }
 
   return REG_NOERROR;
@@ -1266,8 +1266,8 @@
 	 very common, so we do not lose much.  An example that triggers
 	 this case is the sed "script" /\(\)/x.  */
       && node->left != NULL
-      && (node->token.opr.idx >= 8 * sizeof (dfa->used_bkref_map)
-	  || !(dfa->used_bkref_map & (1 << node->token.opr.idx))))
+      && (node->token.opr.idx >= CHAR_BIT * sizeof dfa->used_bkref_map
+	  || !(dfa->used_bkref_map & (1u << node->token.opr.idx))))
     return node->left;
 
   /* Convert the SUBEXP node to the concatenation of an
@@ -2380,7 +2380,9 @@
       if (BE (*err != REG_NOERROR, 0))
 	return NULL;
     }
-  dfa->completed_bkref_map |= 1 << cur_nsub;
+
+  if (cur_nsub <= '9' - '1')
+    dfa->completed_bkref_map |= 1 << cur_nsub;
 
   tree = create_tree (dfa, tree, NULL, SUBEXP);
   if (BE (tree == NULL, 0))