changeset 8098:cc3f4a2db5b0

New module 'mbscasestr'. Reduced goal of 'strcasestr'.
author Bruno Haible <bruno@clisp.org>
date Mon, 05 Feb 2007 02:42:27 +0000
parents de408ca45a04
children 7b1a5da611db
files ChangeLog MODULES.html.sh lib/mbscasestr.c lib/strcasestr.c lib/string_.h m4/strcasestr.m4 m4/string_h.m4 modules/string
diffstat 8 files changed, 81 insertions(+), 122 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Mon Feb 05 02:27:35 2007 +0000
+++ b/ChangeLog	Mon Feb 05 02:42:27 2007 +0000
@@ -1,3 +1,25 @@
+2007-02-04  Bruno Haible  <bruno@clisp.org>
+
+	New module mbscasestr, reduced goal of strcasestr.
+	* modules/mbscasestr: New file.
+	* lib/mbscasestr.c: New file, copied from lib/strcasestr.c.
+	(mbscasestr): Renamed from strcasestr.
+	* lib/strcasestr.c: Don't include mbuiter.h.
+	(strcasestr): Remove support for multibyte locales.
+	* lib/string_.h (strcasestr): Don`t rename. Declare only if missing.
+	Change the conditional link warning.
+	(mbscasestr): New declaration.
+	* m4/mbscasestr.m4: New file.
+	* m4/strcasestr.m4 (gl_FUNC_STRCASESTR): Enable the replacement only if
+	the system does not have strcasestr. Set HAVE_STRCASESTR instead of
+	REPLACE_STRCASESTR.
+	* m4/string_h.m4 (gl_HEADER_STRING_H_DEFAULTS): Initialize
+	HAVE_STRCASESTR instead of REPLACE_STRCASESTR.
+	(gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize GNULIB_MBSCASESTR.
+	* modules/string (string.h): Also substitute GNULIB_MBSCASESTR.
+	Substitute HAVE_STRCASESTR instead of REPLACE_STRCASESTR.
+	* MODULES.html.sh (Internationalization functions): Add mbscasestr.
+
 2007-02-04  Bruno Haible  <bruno@clisp.org>
 
 	Simplify handling of strncasecmp.
--- a/MODULES.html.sh	Mon Feb 05 02:27:35 2007 +0000
+++ b/MODULES.html.sh	Mon Feb 05 02:42:27 2007 +0000
@@ -2164,6 +2164,7 @@
   func_module mbsrchr
   func_module mbsstr
   func_module mbscasecmp
+  func_module mbscasestr
   func_module mbswidth
   func_module memcasecmp
   func_module memcoll
--- a/lib/mbscasestr.c	Mon Feb 05 02:27:35 2007 +0000
+++ b/lib/mbscasestr.c	Mon Feb 05 02:42:27 2007 +0000
@@ -30,12 +30,12 @@
 
 #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
 
-/* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
-   comparison.
+/* Find the first occurrence of the character string NEEDLE in the character
+   string HAYSTACK, using case-insensitive comparison.
    Note: This function may, in multibyte locales, return success even if
    strlen (haystack) < strlen (needle) !  */
 char *
-strcasestr (const char *haystack, const char *needle)
+mbscasestr (const char *haystack, const char *needle)
 {
   /* Be careful not to look at the entire extent of haystack or needle
      until needed.  This is useful because of these two cases:
--- a/lib/strcasestr.c	Mon Feb 05 02:27:35 2007 +0000
+++ b/lib/strcasestr.c	Mon Feb 05 02:42:27 2007 +0000
@@ -24,10 +24,6 @@
 #include <ctype.h>
 #include <stddef.h>  /* for NULL, in case a nonstandard string.h lacks it */
 
-#if HAVE_MBRTOWC
-# include "mbuiter.h"
-#endif
-
 #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
 
 /* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
@@ -37,109 +33,40 @@
 char *
 strcasestr (const char *haystack, const char *needle)
 {
-  /* Be careful not to look at the entire extent of haystack or needle
-     until needed.  This is useful because of these two cases:
-       - haystack may be very long, and a match of needle found early,
-       - needle may be very long, and not even a short initial segment of
-         needle may be found in haystack.  */
-#if HAVE_MBRTOWC
-  if (MB_CUR_MAX > 1)
+  if (*needle != '\0')
     {
-      mbui_iterator_t iter_needle;
-
-      mbui_init (iter_needle, needle);
-      if (mbui_avail (iter_needle))
-	{
-	  mbchar_t b;
-	  mbui_iterator_t iter_haystack;
-
-	  mb_copy (&b, &mbui_cur (iter_needle));
-	  if (b.wc_valid)
-	    b.wc = towlower (b.wc);
-
-	  mbui_init (iter_haystack, haystack);
-	  for (;; mbui_advance (iter_haystack))
-	    {
-	      mbchar_t c;
-
-	      if (!mbui_avail (iter_haystack))
-		/* No match.  */
-		return NULL;
+      /* Speed up the following searches of needle by caching its first
+	 character.  */
+      unsigned char b = TOLOWER ((unsigned char) *needle);
 
-	      mb_copy (&c, &mbui_cur (iter_haystack));
-	      if (c.wc_valid)
-		c.wc = towlower (c.wc);
-	      if (mb_equal (c, b))
-		/* The first character matches.  */
-		{
-		  mbui_iterator_t rhaystack;
-		  mbui_iterator_t rneedle;
-
-		  memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t));
-		  mbui_advance (rhaystack);
+      needle++;
+      for (;; haystack++)
+	{
+	  if (*haystack == '\0')
+	    /* No match.  */
+	    return NULL;
+	  if (TOLOWER ((unsigned char) *haystack) == b)
+	    /* The first character matches.  */
+	    {
+	      const char *rhaystack = haystack + 1;
+	      const char *rneedle = needle;
 
-		  mbui_init (rneedle, needle);
-		  if (!mbui_avail (rneedle))
-		    abort ();
-		  mbui_advance (rneedle);
-
-		  for (;; mbui_advance (rhaystack), mbui_advance (rneedle))
-		    {
-		      if (!mbui_avail (rneedle))
-			/* Found a match.  */
-			return (char *) mbui_cur_ptr (iter_haystack);
-		      if (!mbui_avail (rhaystack))
-			/* No match.  */
-			return NULL;
-		      if (!mb_caseequal (mbui_cur (rhaystack),
-					 mbui_cur (rneedle)))
-			/* Nothing in this round.  */
-			break;
-		    }
+	      for (;; rhaystack++, rneedle++)
+		{
+		  if (*rneedle == '\0')
+		    /* Found a match.  */
+		    return (char *) haystack;
+		  if (*rhaystack == '\0')
+		    /* No match.  */
+		    return NULL;
+		  if (TOLOWER ((unsigned char) *rhaystack)
+		      != TOLOWER ((unsigned char) *rneedle))
+		    /* Nothing in this round.  */
+		    break;
 		}
 	    }
 	}
-      else
-	return (char *) haystack;
     }
   else
-#endif
-    {
-      if (*needle != '\0')
-	{
-	  /* Speed up the following searches of needle by caching its first
-	     character.  */
-	  unsigned char b = TOLOWER ((unsigned char) *needle);
-
-	  needle++;
-	  for (;; haystack++)
-	    {
-	      if (*haystack == '\0')
-		/* No match.  */
-		return NULL;
-	      if (TOLOWER ((unsigned char) *haystack) == b)
-		/* The first character matches.  */
-		{
-		  const char *rhaystack = haystack + 1;
-		  const char *rneedle = needle;
-
-		  for (;; rhaystack++, rneedle++)
-		    {
-		      if (*rneedle == '\0')
-			/* Found a match.  */
-			return (char *) haystack;
-		      if (*rhaystack == '\0')
-			/* No match.  */
-			return NULL;
-		      if (TOLOWER ((unsigned char) *rhaystack)
-			  != TOLOWER ((unsigned char) *rneedle))
-			/* Nothing in this round.  */
-			break;
-		    }
-		}
-	    }
-	}
-      else
-	return (char *) haystack;
-    }
+    return (char *) haystack;
 }
--- a/lib/string_.h	Mon Feb 05 02:27:35 2007 +0000
+++ b/lib/string_.h	Mon Feb 05 02:42:27 2007 +0000
@@ -257,19 +257,17 @@
 #endif
 
 /* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
-   comparison.
-   Note: This function may, in multibyte locales, return success even if
-   strlen (haystack) < strlen (needle) !  */
-#if @GNULIB_STRCASESTR@
-# if @REPLACE_STRCASESTR@
-#  undef strcasestr
-#  define strcasestr rpl_strcasestr
+   comparison.  */
+#if ! @HAVE_STRCASESTR@
 extern char *strcasestr (const char *haystack, const char *needle);
-# endif
-#elif defined GNULIB_POSIXCHECK
+#endif
+#if defined GNULIB_POSIXCHECK
+/* strcasestr() does not work with multibyte strings:
+   It is a glibc extension, and glibc implements it only for unibyte
+   locales.  */
 # undef strcasestr
 # define strcasestr(a,b) \
-    (GL_LINK_WARNING ("strcasestr is often incorrectly implemented for multibyte locales - use gnulib module 'strcasestr' for correct and portable internationalization"), \
+    (GL_LINK_WARNING ("strcasestr does work correctly on character strings in multibyte locales - use mbscasestr if you care about internationalization, or use c-strcasestr if you want a locale independent function"), \
      strcasestr (a, b))
 #endif
 
@@ -345,6 +343,15 @@
 extern int mbscasecmp (const char *s1, const char *s2);
 #endif
 
+#if @GNULIB_MBSCASESTR@
+/* Find the first occurrence of the character string NEEDLE in the character
+   string HAYSTACK, using case-insensitive comparison.
+   Note: This function may, in multibyte locales, return success even if
+   strlen (haystack) < strlen (needle) !
+   Unlike strcasestr(), this function works correctly in multibyte locales.  */
+extern char * mbscasestr (const char *haystack, const char *needle);
+#endif
+
 
 #ifdef __cplusplus
 }
--- a/m4/strcasestr.m4	Mon Feb 05 02:27:35 2007 +0000
+++ b/m4/strcasestr.m4	Mon Feb 05 02:42:27 2007 +0000
@@ -1,4 +1,4 @@
-# strcasestr.m4 serial 4
+# strcasestr.m4 serial 5
 dnl Copyright (C) 2005, 2007 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -7,11 +7,11 @@
 AC_DEFUN([gl_FUNC_STRCASESTR],
 [
   AC_REQUIRE([gl_HEADER_STRING_H_DEFAULTS])
-  dnl No known system has a strcasestr() function that works correctly in
-  dnl multibyte locales. Therefore we use our version always.
-  AC_LIBOBJ(strcasestr)
-  REPLACE_STRCASESTR=1
-  gl_PREREQ_STRCASESTR
+  AC_REPLACE_FUNCS(strcasestr)
+  if test $ac_cv_func_strcasestr = no; then
+    HAVE_STRCASESTR=0
+    gl_PREREQ_STRCASESTR
+  fi
 ])
 
 # Prerequisites of lib/strcasestr.c.
--- a/m4/string_h.m4	Mon Feb 05 02:27:35 2007 +0000
+++ b/m4/string_h.m4	Mon Feb 05 02:42:27 2007 +0000
@@ -41,8 +41,8 @@
   HAVE_DECL_STRNLEN=1;		AC_SUBST([HAVE_DECL_STRNLEN])
   HAVE_STRPBRK=1;		AC_SUBST([HAVE_STRPBRK])
   HAVE_STRSEP=1;		AC_SUBST([HAVE_STRSEP])
+  HAVE_STRCASESTR=1;		AC_SUBST([HAVE_STRCASESTR])
   HAVE_DECL_STRTOK_R=1;		AC_SUBST([HAVE_DECL_STRTOK_R])
-  REPLACE_STRCASESTR=0;		AC_SUBST([REPLACE_STRCASESTR])
 ])
 
 AC_DEFUN([gl_STRING_MODULE_INDICATOR],
@@ -71,4 +71,5 @@
   GNULIB_MBSRCHR=0;     AC_SUBST([GNULIB_MBSRCHR])
   GNULIB_MBSSTR=0;      AC_SUBST([GNULIB_MBSSTR])
   GNULIB_MBSCASECMP=0;  AC_SUBST([GNULIB_MBSCASECMP])
+  GNULIB_MBSCASESTR=0;  AC_SUBST([GNULIB_MBSCASESTR])
 ])
--- a/modules/string	Mon Feb 05 02:27:35 2007 +0000
+++ b/modules/string	Mon Feb 05 02:42:27 2007 +0000
@@ -25,6 +25,7 @@
 	      -e 's|@''GNULIB_MBSRCHR''@|$(GNULIB_MBSRCHR)|g' \
 	      -e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \
 	      -e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \
+	      -e 's|@''GNULIB_MBSCASESTR''@|$(GNULIB_MBSCASESTR)|g' \
 	      -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \
 	      -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \
 	      -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \
@@ -52,8 +53,8 @@
 	      -e 's|@''HAVE_DECL_STRNLEN''@|$(HAVE_DECL_STRNLEN)|g' \
 	      -e 's|@''HAVE_STRPBRK''@|$(HAVE_STRPBRK)|g' \
 	      -e 's|@''HAVE_STRSEP''@|$(HAVE_STRSEP)|g' \
+	      -e 's|@''HAVE_STRCASESTR''@|$(HAVE_STRCASESTR)|g' \
 	      -e 's|@''HAVE_DECL_STRTOK_R''@|$(HAVE_DECL_STRTOK_R)|g' \
-	      -e 's|@''REPLACE_STRCASESTR''@|$(REPLACE_STRCASESTR)|g' \
 	      < $(srcdir)/string_.h; \
 	} > $@-t
 	mv $@-t $@