# HG changeset patch # User Bruno Haible # Date 1236440245 -3600 # Node ID 9b0911f833ac2af39b66c406cca277d17140f02c # Parent 70ae78ad1609ff3f7791ac4be9925fd44d02b202 New module 'uninorm/u8-normxfrm'. diff -r 70ae78ad1609 -r 9b0911f833ac ChangeLog --- a/ChangeLog Sat Mar 07 16:34:49 2009 +0100 +++ b/ChangeLog Sat Mar 07 16:37:25 2009 +0100 @@ -1,3 +1,12 @@ +2009-03-07 Bruno Haible + + New module 'uninorm/u8-normxfrm'. + * lib/uninorm.h (u8_normxfrm, u16_normxfrm, u32_normxfrm): New + declarations. + * lib/uninorm/u8-normxfrm.c: New file. + * lib/uninorm/u-normxfrm.h: New file. + * modules/uninorm/u8-normxfrm: New file. + 2009-03-07 Bruno Haible * modules/uninorm/u8-normcmp (Depends-on): Add uninorm/base. diff -r 70ae78ad1609 -r 9b0911f833ac lib/uninorm.h --- a/lib/uninorm.h Sat Mar 07 16:34:49 2009 +0100 +++ b/lib/uninorm.h Sat Mar 07 16:37:25 2009 +0100 @@ -164,6 +164,21 @@ uninorm_t nf, int *result); +/* Converts the string S of length N to a string in locale encoding, in such a + way that comparing uN_normxfrm (S1) and uN_normxfrm (S2) with memcmp2() is + equivalent to comparing S1 and S2 with uN_normcoll(). + NF must be either UNINORM_NFC or UNINORM_NFKC. */ +extern char * + u8_normxfrm (const uint8_t *s, size_t n, uninorm_t nf, + char *resultbuf, size_t *lengthp); +extern char * + u16_normxfrm (const uint16_t *s, size_t n, uninorm_t nf, + char *resultbuf, size_t *lengthp); +extern char * + u32_normxfrm (const uint32_t *s, size_t n, uninorm_t nf, + char *resultbuf, size_t *lengthp); + + /* Normalization of a stream of Unicode characters. A "stream of Unicode characters" is essentially a function that accepts an diff -r 70ae78ad1609 -r 9b0911f833ac lib/uninorm/u-normxfrm.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/uninorm/u-normxfrm.h Sat Mar 07 16:37:25 2009 +0100 @@ -0,0 +1,89 @@ +/* Locale dependent transformation for comparison of Unicode strings. + Copyright (C) 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +char * +FUNC (const UNIT *s, size_t n, uninorm_t nf, + char *resultbuf, size_t *lengthp) +{ + UNIT normsbuf[2048 / sizeof (UNIT)]; + UNIT *norms; + size_t norms_length; + char convsbuf[2048]; + char *convs; + size_t convs_length; + int ret; + char *result; + + /* Normalize the Unicode string. */ + norms_length = sizeof (normsbuf) / sizeof (UNIT); + norms = U_NORMALIZE (nf, s, n, normsbuf, &norms_length); + if (norms == NULL) + /* errno is set here. */ + return NULL; + + /* Convert it to locale encoding. */ + convs = convsbuf; + convs_length = sizeof (convsbuf) - 1; + ret = U_CONV_TO_ENCODING (locale_charset (), + iconveh_error, + norms, norms_length, + NULL, + &convs, &convs_length); + if (ret < 0) + { + if (norms != normsbuf) + { + int saved_errno = errno; + free (norms); + errno = saved_errno; + } + return NULL; + } + + if (norms != normsbuf) + free (norms); + + /* Ensure one more byte is available. */ + if (convs != convsbuf) + { + char *memory = (char *) realloc (convs, convs_length + 1); + if (memory == NULL) + { + free (convs); + errno = ENOMEM; + return NULL; + } + convs = memory; + } + + /* Apply locale dependent transformations for comparison. */ + result = memxfrm (convs, convs_length, resultbuf, lengthp); + if (result == NULL) + { + if (convs != convsbuf) + { + int saved_errno = errno; + free (convs); + errno = saved_errno; + } + return NULL; + } + + if (convs != convsbuf) + free (convs); + return result; +} diff -r 70ae78ad1609 -r 9b0911f833ac lib/uninorm/u8-normxfrm.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/uninorm/u8-normxfrm.c Sat Mar 07 16:37:25 2009 +0100 @@ -0,0 +1,34 @@ +/* Locale dependent transformation for comparison of UTF-8 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "uninorm.h" + +#include +#include + +#include "localcharset.h" +#include "uniconv.h" +#include "memxfrm.h" + +#define FUNC u8_normxfrm +#define UNIT uint8_t +#define U_NORMALIZE u8_normalize +#define U_CONV_TO_ENCODING u8_conv_to_encoding +#include "u-normxfrm.h" diff -r 70ae78ad1609 -r 9b0911f833ac modules/uninorm/u8-normxfrm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modules/uninorm/u8-normxfrm Sat Mar 07 16:37:25 2009 +0100 @@ -0,0 +1,28 @@ +Description: +Locale dependent transformation for comparison of UTF-8 strings. + +Files: +lib/uninorm/u8-normxfrm.c +lib/uninorm/u-normxfrm.h + +Depends-on: +uninorm/base +uninorm/u8-normalize +uniconv/u8-conv-to-enc +localcharset +memxfrm + +configure.ac: + +Makefile.am: +lib_SOURCES += uninorm/u8-normxfrm.c + +Include: +"uninorm.h" + +License: +LGPL + +Maintainer: +Bruno Haible +