changeset 11350:4ac6a02b1941

New module 'unicase/u8-is-invariant'.
author Bruno Haible <bruno@clisp.org>
date Sun, 08 Mar 2009 16:49:20 +0100
parents 0c4597430a27
children 2a610607cf28
files ChangeLog lib/unicase/invariant.h lib/unicase/u-is-invariant.h lib/unicase/u8-is-invariant.c modules/unicase/u8-is-invariant
diffstat 5 files changed, 178 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Sun Mar 08 16:47:16 2009 +0100
+++ b/ChangeLog	Sun Mar 08 16:49:20 2009 +0100
@@ -1,5 +1,11 @@
 2009-03-08  Bruno Haible  <bruno@clisp.org>
 
+	New module 'unicase/u8-is-invariant'.
+	* lib/unicase/u8-is-invariant.c: New file.
+	* lib/unicase/invariant.h: New file.
+	* lib/unicase/u-is-invariant.h: New file.
+	* modules/unicase/u8-is-invariant: New file.
+
 	Tests for module 'unicase/u32-casecoll'.
 	* modules/unicase/u32-casecoll-tests: New file.
 	* tests/unicase/test-u32-casecoll.c: New file.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/unicase/invariant.h	Sun Mar 08 16:49:20 2009 +0100
@@ -0,0 +1,45 @@
+/* Internal functions for Unicode character case mappings.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "unitypes.h"
+#include "uninorm.h"
+
+/* Return true if mapping NFD(S) to via MAPPING is a no-op.  */
+extern int
+       u8_is_invariant (const uint8_t *s, size_t n,
+			uint8_t * (*mapping) (const uint8_t *s, size_t n, const char *iso639_language,
+					      uninorm_t nf,
+					      uint8_t *resultbuf, size_t *lengthp),
+			const char *iso639_language,
+			bool *resultp);
+extern int
+       u16_is_invariant (const uint16_t *s, size_t n,
+			 uint16_t * (*mapping) (const uint16_t *s, size_t n, const char *iso639_language,
+						uninorm_t nf,
+						uint16_t *resultbuf, size_t *lengthp),
+			 const char *iso639_language,
+			 bool *resultp);
+extern int
+       u32_is_invariant (const uint32_t *s, size_t n,
+			 uint32_t * (*mapping) (const uint32_t *s, size_t n, const char *iso639_language,
+						uninorm_t nf,
+						uint32_t *resultbuf, size_t *lengthp),
+			 const char *iso639_language,
+			 bool *resultp);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/unicase/u-is-invariant.h	Sun Mar 08 16:49:20 2009 +0100
@@ -0,0 +1,64 @@
+/* Test whether a Unicode string is invariant under a given case mapping.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+int
+FUNC (const UNIT *s, size_t n,
+      UNIT * (*mapping) (const UNIT *s, size_t n, const char *iso639_language,
+			 uninorm_t nf,
+			 UNIT *resultbuf, size_t *lengthp),
+      const char *iso639_language,
+      bool *resultp)
+{
+  UNIT normsbuf[2048 / sizeof (UNIT)];
+  UNIT *norms;
+  size_t norms_length;
+  UNIT mappedbuf[2048 / sizeof (UNIT)];
+  UNIT *mapped;
+  size_t mapped_length;
+
+  /* Apply canonical decomposition to S.  */
+  norms_length = sizeof (normsbuf) / sizeof (UNIT);
+  norms = U_NORMALIZE (UNINORM_NFD, s, n, normsbuf, &norms_length);
+  if (norms == NULL)
+    /* errno is set here.  */
+    return -1;
+
+  /* Apply mapping.  */
+  mapped_length = sizeof (mappedbuf) / sizeof (UNIT);
+  mapped = mapping (norms, norms_length, iso639_language, NULL,
+		    mappedbuf, &mapped_length);
+  if (mapped == NULL)
+    {
+      if (norms != normsbuf)
+	{
+	  int saved_errno = errno;
+	  free (norms);
+	  errno = saved_errno;
+	}
+      return -1;
+    }
+
+  /* Compare.  */
+  *resultp = (mapped_length == norms_length
+	      && U_CMP (mapped, norms, norms_length) == 0);
+
+  if (mapped != mappedbuf)
+    free (mapped);
+  if (norms != normsbuf)
+    free (norms);
+  return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/unicase/u8-is-invariant.c	Sun Mar 08 16:49:20 2009 +0100
@@ -0,0 +1,33 @@
+/* Test whether an UTF-8 string is invariant under a given case mapping.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include "unicase/invariant.h"
+
+#include <errno.h>
+#include <stdlib.h>
+
+#include "uninorm.h"
+#include "unistr.h"
+
+#define FUNC u8_is_invariant
+#define UNIT uint8_t
+#define U_NORMALIZE u8_normalize
+#define U_CMP u8_cmp
+#include "u-is-invariant.h"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/modules/unicase/u8-is-invariant	Sun Mar 08 16:49:20 2009 +0100
@@ -0,0 +1,30 @@
+Description:
+Test whether an UTF-8 string is invariant under a given case mapping.
+
+Files:
+lib/unicase/u8-is-invariant.c
+lib/unicase/invariant.h
+lib/unicase/u-is-invariant.h
+
+Depends-on:
+unitypes
+uninorm/base
+uninorm/u8-normalize
+uninorm/nfd
+unistr/u8-cmp
+stdbool
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += unicase/u8-is-invariant.c
+
+Include:
+"unicase/invariant.h"
+
+License:
+LGPL
+
+Maintainer:
+Bruno Haible
+