Mercurial > gnulib
annotate lib/uninorm/composition.c @ 40057:b06060465f09
maint: Run 'make update-copyright'
author | Paul Eggert <eggert@cs.ucla.edu> |
---|---|
date | Tue, 01 Jan 2019 00:25:11 +0100 |
parents | 10eb9086bea0 |
children |
rev | line source |
---|---|
11185
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
1 /* Canonical composition of Unicode characters. |
40057
b06060465f09
maint: Run 'make update-copyright'
Paul Eggert <eggert@cs.ucla.edu>
parents:
19484
diff
changeset
|
2 Copyright (C) 2002, 2006, 2009, 2011-2019 Free Software Foundation, Inc. |
11185
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
3 Written by Bruno Haible <bruno@clisp.org>, 2009. |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
4 |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
5 This program is free software: you can redistribute it and/or modify it |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
6 under the terms of the GNU Lesser General Public License as published |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
7 by the Free Software Foundation; either version 3 of the License, or |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
8 (at your option) any later version. |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
9 |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
10 This program is distributed in the hope that it will be useful, |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
11 but WITHOUT ANY WARRANTY; without even the implied warranty of |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
13 Lesser General Public License for more details. |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
14 |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
15 You should have received a copy of the GNU Lesser General Public License |
19190 | 16 along with this program. If not, see <https://www.gnu.org/licenses/>. */ |
11185
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
17 |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
18 #include <config.h> |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
19 |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
20 /* Specification. */ |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
21 #include "uninorm.h" |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
22 |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
23 #include <string.h> |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
24 |
14152 | 25 struct composition_rule { char codes[6]; unsigned int combined; }; |
11185
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
26 |
11542 | 27 #include "composition-table.h" |
11185
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
28 |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
29 ucs4_t |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
30 uc_composition (ucs4_t uc1, ucs4_t uc2) |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
31 { |
14152 | 32 if (uc1 < 0x12000 && uc2 < 0x12000) |
11185
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
33 { |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
34 if (uc2 >= 0x1161 && uc2 < 0x1161 + 21 |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
35 && uc1 >= 0x1100 && uc1 < 0x1100 + 19) |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
36 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
37 /* Hangul: Combine single letter L and single letter V to form |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
38 two-letter syllable LV. */ |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
39 return 0xAC00 + ((uc1 - 0x1100) * 21 + (uc2 - 0x1161)) * 28; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
40 } |
11185
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
41 else if (uc2 > 0x11A7 && uc2 < 0x11A7 + 28 |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
42 && uc1 >= 0xAC00 && uc1 < 0xD7A4 && ((uc1 - 0xAC00) % 28) == 0) |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
43 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
44 /* Hangul: Combine two-letter syllable LV with single-letter T |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
45 to form three-letter syllable LVT. */ |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
46 return uc1 + (uc2 - 0x11A7); |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
47 } |
11185
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
48 else |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
49 { |
11185
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
50 #if 0 |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
51 unsigned int uc = MUL1 * uc1 * MUL2 * uc2; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
52 unsigned int index1 = uc >> composition_header_0; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
53 if (index1 < composition_header_1) |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
54 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
55 int lookup1 = u_composition.level1[index1]; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
56 if (lookup1 >= 0) |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
57 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
58 unsigned int index2 = (uc >> composition_header_2) & composition_header_3; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
59 int lookup2 = u_composition.level2[lookup1 + index2]; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
60 if (lookup2 >= 0) |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
61 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
62 unsigned int index3 = (uc & composition_header_4); |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
63 unsigned int lookup3 = u_composition.level3[lookup2 + index3]; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
64 if ((lookup3 >> 16) == uc2) |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
65 return lookup3 & ((1U << 16) - 1); |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
66 } |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
67 } |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
68 } |
11185
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
69 #else |
14152 | 70 char codes[6]; |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
71 const struct composition_rule *rule; |
11185
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
72 |
14152 | 73 codes[0] = (uc1 >> 16) & 0xff; |
74 codes[1] = (uc1 >> 8) & 0xff; | |
75 codes[2] = uc1 & 0xff; | |
76 codes[3] = (uc2 >> 16) & 0xff; | |
77 codes[4] = (uc2 >> 8) & 0xff; | |
78 codes[5] = uc2 & 0xff; | |
11185
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
79 |
14152 | 80 rule = gl_uninorm_compose_lookup (codes, 6); |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
81 if (rule != NULL) |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
82 return rule->combined; |
11185
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
83 #endif |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11542
diff
changeset
|
84 } |
11185
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
85 } |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
86 return 0; |
0849684e41dd
New module 'uninorm/composition'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
87 } |