Mercurial > gnulib
annotate lib/unictype/categ_of.c @ 40210:44073ad4207f
unictype/numeric: Fix undefined behaviour.
Reported by Jeffrey Walton <noloader@gmail.com>.
* lib/unictype/numeric.c (uc_numeric_value): Avoid undefined behaviour
on shift overflow, caught by "gcc -fsanitize=undefined".
* lib/unictype/bidi_of.c (uc_bidi_class): Add cast, for clarity.
* lib/unictype/categ_of.c (lookup_withtable): Likewise.
* lib/unictype/joininggroup_of.c (uc_joining_group): Likewise.
author | Bruno Haible <bruno@clisp.org> |
---|---|
date | Fri, 08 Mar 2019 19:17:37 +0100 |
parents | b06060465f09 |
children |
rev | line source |
---|---|
9471
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
1 /* Categories of Unicode characters. |
40057
b06060465f09
maint: Run 'make update-copyright'
Paul Eggert <eggert@cs.ucla.edu>
parents:
19484
diff
changeset
|
2 Copyright (C) 2002, 2006-2007, 2009-2019 Free Software Foundation, Inc. |
9471
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
3 Written by Bruno Haible <bruno@clisp.org>, 2002. |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
4 |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
5 This program is free software: you can redistribute it and/or modify it |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
6 under the terms of the GNU Lesser General Public License as published |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
7 by the Free Software Foundation; either version 3 of the License, or |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
8 (at your option) any later version. |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
9 |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
10 This program is distributed in the hope that it will be useful, |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
11 but WITHOUT ANY WARRANTY; without even the implied warranty of |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
13 Lesser General Public License for more details. |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
14 |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
15 You should have received a copy of the GNU Lesser General Public License |
19190 | 16 along with this program. If not, see <https://www.gnu.org/licenses/>. */ |
9471
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
17 |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
18 #include <config.h> |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
19 |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
20 /* Specification. */ |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
21 #include "unictype.h" |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
22 |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
23 /* Define u_category table. */ |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
24 #include "categ_of.h" |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
25 |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
26 static inline int |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
27 lookup_withtable (ucs4_t uc) |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
28 { |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
29 unsigned int index1 = uc >> category_header_0; |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
30 if (index1 < category_header_1) |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
31 { |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
32 int lookup1 = u_category.level1[index1]; |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
33 if (lookup1 >= 0) |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
9471
diff
changeset
|
34 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
9471
diff
changeset
|
35 unsigned int index2 = (uc >> category_header_2) & category_header_3; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
9471
diff
changeset
|
36 int lookup2 = u_category.level2[lookup1 + index2]; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
9471
diff
changeset
|
37 if (lookup2 >= 0) |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
9471
diff
changeset
|
38 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
9471
diff
changeset
|
39 unsigned int index3 = ((uc & category_header_4) + lookup2) * 5; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
9471
diff
changeset
|
40 /* level3 contains 5-bit values, packed into 16-bit words. */ |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
9471
diff
changeset
|
41 unsigned int lookup3 = |
40210
44073ad4207f
unictype/numeric: Fix undefined behaviour.
Bruno Haible <bruno@clisp.org>
parents:
40057
diff
changeset
|
42 (((unsigned int) u_category.level3[index3>>4] |
17880
18371cbd9692
unictype: avoid undefined left-shift behavior
Daiki Ueno <ueno@gnu.org>
parents:
17848
diff
changeset
|
43 | ((unsigned int) u_category.level3[(index3>>4)+1] << 16)) |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
9471
diff
changeset
|
44 >> (index3 % 16)) |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
9471
diff
changeset
|
45 & 0x1f; |
9471
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
46 |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
9471
diff
changeset
|
47 return lookup3; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
9471
diff
changeset
|
48 } |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
9471
diff
changeset
|
49 } |
9471
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
50 return 29; /* = log2(UC_CATEGORY_MASK_Cn) */ |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
51 } |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
52 return -1; |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
53 } |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
54 |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
55 bool |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
56 uc_is_general_category_withtable (ucs4_t uc, uint32_t bitmask) |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
57 { |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
58 int bit = lookup_withtable (uc); |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
59 |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
60 if (bit >= 0) |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
61 return ((bitmask >> bit) & 1); |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
62 else |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
63 return false; |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
64 } |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
65 |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
66 uc_general_category_t |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
67 uc_general_category (ucs4_t uc) |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
68 { |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
69 int bit = lookup_withtable (uc); |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
70 uc_general_category_t result; |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
71 |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
72 if (bit >= 0) |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
73 { |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
74 result.bitmask = 1 << bit; |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
75 result.generic = 1; |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
76 result.lookup.lookup_fn = &uc_is_general_category_withtable; |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
77 return result; |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
78 } |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
79 else |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
80 return _UC_CATEGORY_NONE; |
6dc73c76eced
Unicode character classification functions.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
81 } |