changeset 27606:5807e6a10907

New function mem_iconveh.
author Bruno Haible <bruno@clisp.org>
date Sun, 21 Jan 2007 22:04:33 +0000
parents ed3a53ea0945
children d2f280a20cc3
files lib/striconveh.c lib/striconveh.h tests/test-striconveh.c
diffstat 3 files changed, 314 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/lib/striconveh.c	Sun Jan 21 21:34:27 2007 +0000
+++ b/lib/striconveh.c	Sun Jan 21 22:04:33 2007 +0000
@@ -766,6 +766,154 @@
 
 #endif
 
+int
+mem_iconveh (const char *src, size_t srclen,
+	     const char *from_codeset, const char *to_codeset,
+	     enum iconv_ilseq_handler handler,
+	     char **resultp, size_t *lengthp)
+{
+  if (c_strcasecmp (from_codeset, to_codeset) == 0)
+    {
+      char *result;
+
+      if (*resultp != NULL && *lengthp >= srclen)
+	result = *resultp;
+      else
+	{
+	  result = (char *) malloc (srclen);
+	  if (result == NULL)
+	    {
+	      errno = ENOMEM;
+	      return -1;
+	    }
+	}
+      memcpy (result, src, srclen);
+      *resultp = result;
+      *lengthp = srclen;
+      return 0;
+    }
+  else
+    {
+#if HAVE_ICONV
+      iconv_t cd;
+      iconv_t cd1;
+      iconv_t cd2;
+      char *result;
+      size_t length;
+      int retval;
+
+      /* Avoid glibc-2.1 bug with EUC-KR.  */
+# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
+      if (c_strcasecmp (from_codeset, "EUC-KR") == 0
+	  || c_strcasecmp (to_codeset, "EUC-KR") == 0)
+	{
+	  errno = EINVAL;
+	  return -1;
+	}
+# endif
+
+      cd = iconv_open (to_codeset, from_codeset);
+      if (cd == (iconv_t)(-1))
+	return -1;
+
+      if (c_strcasecmp (from_codeset, "UTF-8") == 0)
+	cd1 = (iconv_t)(-1);
+      else
+	{
+	  cd1 = iconv_open ("UTF-8", from_codeset);
+	  if (cd1 == (iconv_t)(-1))
+	    {
+	      int saved_errno = errno;
+	      iconv_close (cd);
+	      errno = saved_errno;
+	      return -1;
+	    }
+	}
+
+      if (c_strcasecmp (to_codeset, "UTF-8") == 0)
+	cd2 = (iconv_t)(-1);
+      else
+	{
+	  cd2 = iconv_open (to_codeset, "UTF-8");
+	  if (cd2 == (iconv_t)(-1))
+	    {
+	      int saved_errno = errno;
+	      if (cd1 != (iconv_t)(-1))
+		iconv_close (cd1);
+	      iconv_close (cd);
+	      errno = saved_errno;
+	      return -1;
+	    }
+	}
+
+      result = *resultp;
+      length = *lengthp;
+      retval =
+	mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, &result, &length);
+
+      if (retval < 0)
+	{
+	  /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv.  */
+	  int saved_errno = errno;
+	  if (cd2 != (iconv_t)(-1))
+	    iconv_close (cd2);
+	  if (cd1 != (iconv_t)(-1))
+	    iconv_close (cd1);
+	  iconv_close (cd);
+	  errno = saved_errno;
+	}
+      else
+	{
+	  if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0)
+	    {
+	      /* Return -1, but free the allocated memory, and while doing
+		 that, preserve the errno from iconv_close.  */
+	      int saved_errno = errno;
+	      if (cd1 != (iconv_t)(-1))
+		iconv_close (cd1);
+	      iconv_close (cd);
+	      if (result != *resultp && result != NULL)
+		free (result);
+	      errno = saved_errno;
+	      return -1;
+	    }
+	  if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0)
+	    {
+	      /* Return -1, but free the allocated memory, and while doing
+		 that, preserve the errno from iconv_close.  */
+	      int saved_errno = errno;
+	      iconv_close (cd);
+	      if (result != *resultp && result != NULL)
+		free (result);
+	      errno = saved_errno;
+	      return -1;
+	    }
+	  if (iconv_close (cd) < 0)
+	    {
+	      /* Return -1, but free the allocated memory, and while doing
+		 that, preserve the errno from iconv_close.  */
+	      int saved_errno = errno;
+	      if (result != *resultp && result != NULL)
+		free (result);
+	      errno = saved_errno;
+	      return -1;
+	    }
+	  *resultp = result;
+	  *lengthp = length;
+	}
+      return retval;
+#else
+      /* This is a different error code than if iconv_open existed but didn't
+	 support from_codeset and to_codeset, so that the caller can emit
+	 an error message such as
+	   "iconv() is not supported. Installing GNU libiconv and
+	    then reinstalling this package would fix this."  */
+      errno = ENOSYS;
+      return -1;
+#endif
+    }
+}
+
 char *
 str_iconveh (const char *src,
 	     const char *from_codeset, const char *to_codeset,
--- a/lib/striconveh.h	Sun Jan 21 21:34:27 2007 +0000
+++ b/lib/striconveh.h	Sun Jan 21 22:04:33 2007 +0000
@@ -80,6 +80,23 @@
 #endif
 
 /* Convert an entire string from one encoding to another, using iconv.
+   The original string is at [SRC,...,SRC+SRCLEN-1].
+   Both the "from" and the "to" encoding must use a single NUL byte at the
+   end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
+   *RESULTP and *LENGTH should initially be a scratch buffer and its size,
+   or *RESULTP can initially be NULL.
+   May erase the contents of the memory at *RESULTP.
+   Return value: 0 if successful, otherwise -1 and errno set.
+   If successful: The resulting string is stored in *RESULTP and its length
+   in *LENGTHP.  *RESULTP is set to a freshly allocated memory block, or is
+   unchanged if no dynamic memory allocation was necessary.  */
+extern int
+       mem_iconveh (const char *src, size_t srclen,
+		    const char *from_codeset, const char *to_codeset,
+		    enum iconv_ilseq_handler handler,
+		    char **resultp, size_t *lengthp);
+
+/* Convert an entire string from one encoding to another, using iconv.
    The original string is the NUL-terminated string starting at SRC.
    Both the "from" and the "to" encoding must use a single NUL byte at the
    end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
--- a/tests/test-striconveh.c	Sun Jan 21 21:34:27 2007 +0000
+++ b/tests/test-striconveh.c	Sun Jan 21 22:04:33 2007 +0000
@@ -347,6 +347,155 @@
   iconv_close (cd_88592_to_utf8);
   iconv_close (cd_utf8_to_88592);
 
+  /* ------------------------- Test mem_iconveh() ------------------------- */
+
+  /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors.  */
+  for (h = 0; h < SIZEOF (handlers); h++)
+    {
+      enum iconv_ilseq_handler handler = handlers[h];
+      static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
+      static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
+      char *result = NULL;
+      size_t length = 0;
+      int retval = mem_iconveh (input, strlen (input),
+				"ISO-8859-2", "ISO-8859-1",
+				handler,
+				&result, &length);
+      ASSERT (retval == 0);
+      ASSERT (length == strlen (expected));
+      ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+      free (result);
+    }
+
+  /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ.  */
+  for (h = 0; h < SIZEOF (handlers); h++)
+    {
+      enum iconv_ilseq_handler handler = handlers[h];
+      static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
+      char *result = NULL;
+      size_t length = 0;
+      int retval = mem_iconveh (input, strlen (input),
+				"ISO-8859-2", "ISO-8859-1",
+				handler,
+				&result, &length);
+      switch (handler)
+	{
+	case iconveh_error:
+	  ASSERT (retval == -1 && errno == EILSEQ);
+	  ASSERT (result == NULL);
+	  break;
+	case iconveh_question_mark:
+	  {
+	    static const char expected[] = "Rafa? Maszkowski";
+	    ASSERT (retval == 0);
+	    ASSERT (length == strlen (expected));
+	    ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+	    free (result);
+	  }
+	  break;
+	case iconveh_escape_sequence:
+	  {
+	    static const char expected[] = "Rafa\\u0142 Maszkowski";
+	    ASSERT (retval == 0);
+	    ASSERT (length == strlen (expected));
+	    ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+	    free (result);
+	  }
+	  break;
+	}
+    }
+
+  /* Test conversion from ISO-8859-1 to UTF-8 with no errors.  */
+  for (h = 0; h < SIZEOF (handlers); h++)
+    {
+      enum iconv_ilseq_handler handler = handlers[h];
+      static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
+      static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
+      char *result = NULL;
+      size_t length = 0;
+      int retval = mem_iconveh (input, strlen (input),
+				"ISO-8859-1", "UTF-8",
+				handler,
+				&result, &length);
+      ASSERT (retval == 0);
+      ASSERT (length == strlen (expected));
+      ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+      free (result);
+    }
+
+  /* Test conversion from UTF-8 to ISO-8859-1 with no errors.  */
+  for (h = 0; h < SIZEOF (handlers); h++)
+    {
+      enum iconv_ilseq_handler handler = handlers[h];
+      static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
+      static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
+      char *result = NULL;
+      size_t length = 0;
+      int retval = mem_iconveh (input, strlen (input),
+				"UTF-8", "ISO-8859-1",
+				handler,
+				&result, &length);
+      ASSERT (retval == 0);
+      ASSERT (length == strlen (expected));
+      ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+      free (result);
+    }
+
+  /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ.  */
+  for (h = 0; h < SIZEOF (handlers); h++)
+    {
+      enum iconv_ilseq_handler handler = handlers[h];
+      static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
+      char *result = NULL;
+      size_t length = 0;
+      int retval = mem_iconveh (input, strlen (input),
+				"UTF-8", "ISO-8859-1",
+				handler,
+				&result, &length);
+      switch (handler)
+	{
+	case iconveh_error:
+	  ASSERT (retval == -1 && errno == EILSEQ);
+	  ASSERT (result == NULL);
+	  break;
+	case iconveh_question_mark:
+	  {
+	    static const char expected[] = "Rafa? Maszkowski";
+	    ASSERT (retval == 0);
+	    ASSERT (length == strlen (expected));
+	    ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+	    free (result);
+	  }
+	  break;
+	case iconveh_escape_sequence:
+	  {
+	    static const char expected[] = "Rafa\\u0142 Maszkowski";
+	    ASSERT (retval == 0);
+	    ASSERT (length == strlen (expected));
+	    ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+	    free (result);
+	  }
+	  break;
+	}
+    }
+
+  /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL.  */
+  for (h = 0; h < SIZEOF (handlers); h++)
+    {
+      enum iconv_ilseq_handler handler = handlers[h];
+      static const char input[] = "\342";
+      char *result = NULL;
+      size_t length = 0;
+      int retval = mem_iconveh (input, strlen (input),
+				"UTF-8", "ISO-8859-1",
+				handler,
+				&result, &length);
+      ASSERT (retval == 0);
+      ASSERT (length == 0);
+      if (result != NULL)
+	free (result);
+    }
+
   /* ------------------------- Test str_iconveh() ------------------------- */
 
   /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors.  */