changeset 11179:1343849defec

New module 'uninorm/decomposition'.
author Bruno Haible <bruno@clisp.org>
date Sat, 21 Feb 2009 12:19:16 +0100
parents 8af28582a966
children 756c9e858420
files ChangeLog lib/uninorm/decomposition.c modules/uninorm/decomposition
diffstat 3 files changed, 109 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Sat Feb 21 12:13:28 2009 +0100
+++ b/ChangeLog	Sat Feb 21 12:19:16 2009 +0100
@@ -1,5 +1,9 @@
 2009-02-21  Bruno Haible  <bruno@clisp.org>
 
+	New module 'uninorm/decomposition'.
+	* lib/uninorm/decomposition.c: New file.
+	* modules/uninorm/decomposition: New file.
+
 	New module 'uninorm/decomposition-table'.
 	* lib/uninorm/decomposition-table.h: New file.
 	* lib/uninorm/decomposition-table.c: New file.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/uninorm/decomposition.c	Sat Feb 21 12:19:16 2009 +0100
@@ -0,0 +1,81 @@
+/* Decomposition of Unicode characters.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include "uninorm.h"
+
+#include "decomposition-table.h"
+
+int
+uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition)
+{
+  if (uc >= 0xAC00 && uc < 0xD7A4)
+    {
+      /* Hangul syllable.  See Unicode standard, chapter 3,
+	 section "Hangul Syllable Decomposition".  */
+      unsigned int t, v, l;
+
+      uc -= 0xAC00;
+      t = uc % 28;
+      uc = uc / 28;
+      v = uc % 21;
+      l = uc / 21;
+
+      *decomp_tag = UC_DECOMP_CANONICAL;
+      decomposition[0] = 0x1100 + l;
+      decomposition[1] = 0x1161 + v;
+      if (t == 0)
+	return 2;
+      else
+	{
+	  decomposition[2] = 0x11A7 + t;
+	  return 3;
+	}
+    }
+  else if (uc < 0x110000)
+    {
+      unsigned short entry = decomp_index (uc);
+      if (entry != (unsigned short)(-1))
+	{
+	  const unsigned char *p;
+	  unsigned int element;
+	  unsigned int length;
+
+	  p = &gl_uninorm_decomp_chars_table[3 * (entry & 0x7FFF)];
+	  element = (p[0] << 16) | (p[1] << 8) | p[2];
+	  /* The first element has 5 bits for the decomposition type.  */
+	  *decomp_tag = (element >> 18) & 0x1f;
+	  length = 1;
+	  for (;;)
+	    {
+	      /* Every element has an 18 bits wide Unicode code point.  */
+	      *decomposition = element & 0x3ffff;
+	      /* Bit 23 tells whether there are more elements,  */
+	      if ((element & (1 << 23)) == 0)
+		break;
+	      p += 3;
+	      element = (p[0] << 16) | (p[1] << 8) | p[2];
+	      decomposition++;
+	      length++;
+	    }
+	  return length;
+	}
+    }
+  return -1;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/modules/uninorm/decomposition	Sat Feb 21 12:19:16 2009 +0100
@@ -0,0 +1,24 @@
+Description:
+Decomposition of Unicode characters.
+
+Files:
+lib/uninorm/decomposition.c
+
+Depends-on:
+uninorm/base
+uninorm/decomposition-table
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += uninorm/decomposition.c
+
+Include:
+"uninorm.h"
+
+License:
+LGPL
+
+Maintainer:
+Bruno Haible
+