changeset 30979:9b0911f833ac

New module 'uninorm/u8-normxfrm'.
author Bruno Haible <bruno@clisp.org>
date Sat, 07 Mar 2009 16:37:25 +0100
parents 70ae78ad1609
children 05db6891e5f0
files ChangeLog lib/uninorm.h lib/uninorm/u-normxfrm.h lib/uninorm/u8-normxfrm.c modules/uninorm/u8-normxfrm
diffstat 5 files changed, 175 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Sat Mar 07 16:34:49 2009 +0100
+++ b/ChangeLog	Sat Mar 07 16:37:25 2009 +0100
@@ -1,3 +1,12 @@
+2009-03-07  Bruno Haible  <bruno@clisp.org>
+
+	New module 'uninorm/u8-normxfrm'.
+	* lib/uninorm.h (u8_normxfrm, u16_normxfrm, u32_normxfrm): New
+	declarations.
+	* lib/uninorm/u8-normxfrm.c: New file.
+	* lib/uninorm/u-normxfrm.h: New file.
+	* modules/uninorm/u8-normxfrm: New file.
+
 2009-03-07  Bruno Haible  <bruno@clisp.org>
 
 	* modules/uninorm/u8-normcmp (Depends-on): Add uninorm/base.
--- a/lib/uninorm.h	Sat Mar 07 16:34:49 2009 +0100
+++ b/lib/uninorm.h	Sat Mar 07 16:37:25 2009 +0100
@@ -164,6 +164,21 @@
 		    uninorm_t nf, int *result);
 
 
+/* Converts the string S of length N to a string in locale encoding, in such a
+   way that comparing uN_normxfrm (S1) and uN_normxfrm (S2) with memcmp2() is
+   equivalent to comparing S1 and S2 with uN_normcoll().
+   NF must be either UNINORM_NFC or UNINORM_NFKC.  */
+extern char *
+       u8_normxfrm (const uint8_t *s, size_t n, uninorm_t nf,
+		    char *resultbuf, size_t *lengthp);
+extern char *
+       u16_normxfrm (const uint16_t *s, size_t n, uninorm_t nf,
+		     char *resultbuf, size_t *lengthp);
+extern char *
+       u32_normxfrm (const uint32_t *s, size_t n, uninorm_t nf,
+		     char *resultbuf, size_t *lengthp);
+
+
 /* Normalization of a stream of Unicode characters.
 
    A "stream of Unicode characters" is essentially a function that accepts an
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/uninorm/u-normxfrm.h	Sat Mar 07 16:37:25 2009 +0100
@@ -0,0 +1,89 @@
+/* Locale dependent transformation for comparison of Unicode strings.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+char *
+FUNC (const UNIT *s, size_t n, uninorm_t nf,
+      char *resultbuf, size_t *lengthp)
+{
+  UNIT normsbuf[2048 / sizeof (UNIT)];
+  UNIT *norms;
+  size_t norms_length;
+  char convsbuf[2048];
+  char *convs;
+  size_t convs_length;
+  int ret;
+  char *result;
+
+  /* Normalize the Unicode string.  */
+  norms_length = sizeof (normsbuf) / sizeof (UNIT);
+  norms = U_NORMALIZE (nf, s, n, normsbuf, &norms_length);
+  if (norms == NULL)
+    /* errno is set here.  */
+    return NULL;
+
+  /* Convert it to locale encoding.  */
+  convs = convsbuf;
+  convs_length = sizeof (convsbuf) - 1;
+  ret = U_CONV_TO_ENCODING (locale_charset (),
+			    iconveh_error,
+			    norms, norms_length,
+			    NULL,
+			    &convs, &convs_length);
+  if (ret < 0)
+    {
+      if (norms != normsbuf)
+	{
+	  int saved_errno = errno;
+	  free (norms);
+	  errno = saved_errno;
+	}
+      return NULL;
+    }
+
+  if (norms != normsbuf)
+    free (norms);
+
+  /* Ensure one more byte is available.  */
+  if (convs != convsbuf)
+    {
+      char *memory = (char *) realloc (convs, convs_length + 1);
+      if (memory == NULL)
+	{
+	  free (convs);
+	  errno = ENOMEM;
+	  return NULL;
+	}
+      convs = memory;
+    }
+
+  /* Apply locale dependent transformations for comparison.  */
+  result = memxfrm (convs, convs_length, resultbuf, lengthp);
+  if (result == NULL)
+    {
+      if (convs != convsbuf)
+	{
+	  int saved_errno = errno;
+	  free (convs);
+	  errno = saved_errno;
+	}
+      return NULL;
+    }
+
+  if (convs != convsbuf)
+    free (convs);
+  return result;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/uninorm/u8-normxfrm.c	Sat Mar 07 16:37:25 2009 +0100
@@ -0,0 +1,34 @@
+/* Locale dependent transformation for comparison of UTF-8 strings.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include "uninorm.h"
+
+#include <errno.h>
+#include <stdlib.h>
+
+#include "localcharset.h"
+#include "uniconv.h"
+#include "memxfrm.h"
+
+#define FUNC u8_normxfrm
+#define UNIT uint8_t
+#define U_NORMALIZE u8_normalize
+#define U_CONV_TO_ENCODING u8_conv_to_encoding
+#include "u-normxfrm.h"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/modules/uninorm/u8-normxfrm	Sat Mar 07 16:37:25 2009 +0100
@@ -0,0 +1,28 @@
+Description:
+Locale dependent transformation for comparison of UTF-8 strings.
+
+Files:
+lib/uninorm/u8-normxfrm.c
+lib/uninorm/u-normxfrm.h
+
+Depends-on:
+uninorm/base
+uninorm/u8-normalize
+uniconv/u8-conv-to-enc
+localcharset
+memxfrm
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += uninorm/u8-normxfrm.c
+
+Include:
+"uninorm.h"
+
+License:
+LGPL
+
+Maintainer:
+Bruno Haible
+