11193
|
1 /* Test of Unicode compliance of normalization of UTF-32 strings. |
|
2 Copyright (C) 2009 Free Software Foundation, Inc. |
|
3 |
|
4 This program is free software: you can redistribute it and/or modify |
|
5 it under the terms of the GNU General Public License as published by |
|
6 the Free Software Foundation; either version 3 of the License, or |
|
7 (at your option) any later version. |
|
8 |
|
9 This program is distributed in the hope that it will be useful, |
|
10 but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 GNU General Public License for more details. |
|
13 |
|
14 You should have received a copy of the GNU General Public License |
|
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
|
16 |
|
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */ |
|
18 |
|
19 #include <config.h> |
|
20 |
|
21 /* Specification. */ |
|
22 #include "test-u32-normalize-big.h" |
|
23 |
|
24 #if GNULIB_UNINORM_U32_NORMALIZE |
|
25 |
|
26 #include <stdio.h> |
|
27 #include <stdlib.h> |
|
28 |
|
29 #include "xalloc.h" |
|
30 #include "unistr.h" |
|
31 |
|
32 #define ASSERT(expr) \ |
|
33 do \ |
|
34 { \ |
|
35 if (!(expr)) \ |
|
36 { \ |
|
37 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ |
|
38 fflush (stderr); \ |
|
39 abort (); \ |
|
40 } \ |
|
41 } \ |
|
42 while (0) |
|
43 |
|
44 #define ASSERT_WITH_LINE(expr, file, line) \ |
|
45 do \ |
|
46 { \ |
|
47 if (!(expr)) \ |
|
48 { \ |
|
49 fprintf (stderr, "%s:%d: assertion failed for %s:%u\n", \ |
|
50 __FILE__, __LINE__, file, line); \ |
|
51 fflush (stderr); \ |
|
52 abort (); \ |
|
53 } \ |
|
54 } \ |
|
55 while (0) |
|
56 |
|
57 static int |
|
58 cmp_ucs4_t (const void *a, const void *b) |
|
59 { |
|
60 ucs4_t a_value = *(const ucs4_t *)a; |
|
61 ucs4_t b_value = *(const ucs4_t *)b; |
|
62 return (a_value < b_value ? -1 : a_value > b_value ? 1 : 0); |
|
63 } |
|
64 |
|
65 void |
|
66 read_normalization_test_file (const char *filename, |
|
67 struct normalization_test_file *file) |
|
68 { |
|
69 FILE *stream; |
|
70 unsigned int lineno; |
|
71 int part_index; |
|
72 struct normalization_test_line *lines; |
|
73 size_t lines_length; |
|
74 size_t lines_allocated; |
|
75 |
|
76 stream = fopen (filename, "r"); |
|
77 if (stream == NULL) |
|
78 { |
|
79 fprintf (stderr, "error during fopen of '%s'\n", filename); |
|
80 exit (1); |
|
81 } |
|
82 |
|
83 for (part_index = 0; part_index < 4; part_index++) |
|
84 { |
|
85 file->parts[part_index].lines = NULL; |
|
86 file->parts[part_index].lines_length = 0; |
|
87 } |
|
88 |
|
89 lineno = 0; |
|
90 |
|
91 part_index = -1; |
|
92 lines = NULL; |
|
93 lines_length = 0; |
|
94 lines_allocated = 0; |
|
95 |
|
96 for (;;) |
|
97 { |
|
98 char buf[1000+1]; |
|
99 char *ptr; |
|
100 int c; |
|
101 struct normalization_test_line line; |
|
102 size_t sequence_index; |
|
103 |
|
104 lineno++; |
|
105 |
|
106 /* Read a line. */ |
|
107 ptr = buf; |
|
108 do |
|
109 { |
|
110 c = getc (stream); |
|
111 if (c == EOF || c == '\n') |
|
112 break; |
|
113 *ptr++ = c; |
|
114 } |
|
115 while (ptr < buf + 1000); |
|
116 *ptr = '\0'; |
|
117 if (c == EOF) |
|
118 break; |
|
119 |
|
120 /* Ignore empty lines and comment lines. */ |
|
121 if (buf[0] == '\0' || buf[0] == '#') |
|
122 continue; |
|
123 |
|
124 /* Handle lines that introduce a new part. */ |
|
125 if (buf[0] == '@') |
|
126 { |
|
127 /* Switch to the next part. */ |
|
128 if (part_index >= 0) |
|
129 { |
|
130 lines = |
|
131 (struct normalization_test_line *) |
|
132 xnrealloc (lines, lines_length, sizeof (struct normalization_test_line)); |
|
133 file->parts[part_index].lines = lines; |
|
134 file->parts[part_index].lines_length = lines_length; |
|
135 } |
|
136 part_index++; |
|
137 lines = NULL; |
|
138 lines_length = 0; |
|
139 lines_allocated = 0; |
|
140 continue; |
|
141 } |
|
142 |
|
143 /* It's a line containing 5 sequences of Unicode characters. |
|
144 Parse it and append it to the current part. */ |
|
145 if (!(part_index >= 0 && part_index < 4)) |
|
146 { |
|
147 fprintf (stderr, "unexpected structure of '%s'\n", filename); |
|
148 exit (1); |
|
149 } |
|
150 ptr = buf; |
|
151 line.lineno = lineno; |
|
152 for (sequence_index = 0; sequence_index < 5; sequence_index++) |
|
153 line.sequences[sequence_index] = NULL; |
|
154 for (sequence_index = 0; sequence_index < 5; sequence_index++) |
|
155 { |
|
156 uint32_t *sequence = XNMALLOC (1, uint32_t); |
|
157 size_t sequence_length = 0; |
|
158 |
|
159 for (;;) |
|
160 { |
|
161 char *endptr; |
|
162 unsigned int uc; |
|
163 |
|
164 uc = strtoul (ptr, &endptr, 16); |
|
165 if (endptr == ptr) |
|
166 break; |
|
167 ptr = endptr; |
|
168 |
|
169 /* Append uc to the sequence. */ |
|
170 sequence = |
|
171 (uint32_t *) |
|
172 xnrealloc (sequence, sequence_length + 2, sizeof (uint32_t)); |
|
173 sequence[sequence_length] = uc; |
|
174 sequence_length++; |
|
175 |
|
176 if (*ptr == ' ') |
|
177 ptr++; |
|
178 } |
|
179 if (sequence_length == 0) |
|
180 { |
|
181 fprintf (stderr, "empty character sequence in '%s'\n", filename); |
|
182 exit (1); |
|
183 } |
|
184 sequence[sequence_length] = 0; /* terminator */ |
|
185 |
|
186 line.sequences[sequence_index] = sequence; |
|
187 |
|
188 if (*ptr != ';') |
|
189 { |
|
190 fprintf (stderr, "error parsing '%s'\n", filename); |
|
191 exit (1); |
|
192 } |
|
193 ptr++; |
|
194 } |
|
195 |
|
196 /* Append the line to the current part. */ |
|
197 if (lines_length == lines_allocated) |
|
198 { |
|
199 lines_allocated = 2 * lines_allocated; |
|
200 if (lines_allocated < 7) |
|
201 lines_allocated = 7; |
|
202 lines = |
|
203 (struct normalization_test_line *) |
|
204 xnrealloc (lines, lines_allocated, sizeof (struct normalization_test_line)); |
|
205 } |
|
206 lines[lines_length] = line; |
|
207 lines_length++; |
|
208 } |
|
209 |
|
210 if (part_index >= 0) |
|
211 { |
|
212 lines = |
|
213 (struct normalization_test_line *) |
|
214 xnrealloc (lines, lines_length, sizeof (struct normalization_test_line)); |
|
215 file->parts[part_index].lines = lines; |
|
216 file->parts[part_index].lines_length = lines_length; |
|
217 } |
|
218 |
|
219 { |
|
220 /* Collect all c1 values from the part 1 in an array. */ |
|
221 const struct normalization_test_part *p = &file->parts[1]; |
|
222 ucs4_t *c1_array = XNMALLOC (p->lines_length + 1, ucs4_t); |
|
223 size_t line_index; |
|
224 |
|
225 for (line_index = 0; line_index < p->lines_length; line_index++) |
|
226 { |
|
227 const unsigned int *sequence = p->lines[line_index].sequences[0]; |
|
228 /* In part 1, every sequences[0] consists of a single character. */ |
|
229 if (!(sequence[0] != 0 && sequence[1] == 0)) |
|
230 abort (); |
|
231 c1_array[line_index] = sequence[0]; |
|
232 } |
|
233 |
|
234 /* Sort this array. */ |
|
235 qsort (c1_array, p->lines_length, sizeof (ucs4_t), cmp_ucs4_t); |
|
236 |
|
237 /* Add the sentinel at the end. */ |
|
238 c1_array[p->lines_length] = 0x110000; |
|
239 |
|
240 file->part1_c1_sorted = c1_array; |
|
241 } |
|
242 |
|
243 file->filename = xstrdup (filename); |
|
244 |
|
245 if (ferror (stream) || fclose (stream)) |
|
246 { |
|
247 fprintf (stderr, "error reading from '%s'\n", filename); |
|
248 exit (1); |
|
249 } |
|
250 } |
|
251 |
|
252 void |
|
253 test_specific (const struct normalization_test_file *file, |
|
254 int (*check) (const uint32_t *c1, size_t c1_length, |
|
255 const uint32_t *c2, size_t c2_length, |
|
256 const uint32_t *c3, size_t c3_length, |
|
257 const uint32_t *c4, size_t c4_length, |
|
258 const uint32_t *c5, size_t c5_length)) |
|
259 { |
|
260 size_t part_index; |
|
261 |
|
262 for (part_index = 0; part_index < 4; part_index++) |
|
263 { |
|
264 const struct normalization_test_part *p = &file->parts[part_index]; |
|
265 size_t line_index; |
|
266 |
|
267 for (line_index = 0; line_index < p->lines_length; line_index++) |
|
268 { |
|
269 const struct normalization_test_line *l = &p->lines[line_index]; |
|
270 |
|
271 ASSERT_WITH_LINE (check (l->sequences[0], u32_strlen (l->sequences[0]), |
|
272 l->sequences[1], u32_strlen (l->sequences[1]), |
|
273 l->sequences[2], u32_strlen (l->sequences[2]), |
|
274 l->sequences[3], u32_strlen (l->sequences[3]), |
|
275 l->sequences[4], u32_strlen (l->sequences[4])) |
|
276 == 0, |
|
277 file->filename, l->lineno); |
|
278 } |
|
279 } |
|
280 } |
|
281 |
|
282 void |
|
283 test_other (const struct normalization_test_file *file, uninorm_t nf) |
|
284 { |
|
285 /* Check that for every character not listed in part 1 of the |
|
286 NormalizationTest.txt file, the character maps to itself in each |
|
287 of the four normalization forms. */ |
|
288 const ucs4_t *p = file->part1_c1_sorted; |
|
289 ucs4_t uc; |
|
290 |
|
291 for (uc = 0; uc < 0x110000; uc++) |
|
292 { |
|
293 if (uc >= 0xD800 && uc < 0xE000) |
|
294 { |
|
295 /* A surrogate, not a character. Skip uc. */ |
|
296 } |
|
297 else if (uc == *p) |
|
298 { |
|
299 /* Skip uc. */ |
|
300 p++; |
|
301 } |
|
302 else |
|
303 { |
|
304 uint32_t input[1]; |
|
305 size_t length; |
|
306 uint32_t *result; |
|
307 |
|
308 input[0] = uc; |
|
309 result = u32_normalize (nf, input, 1, NULL, &length); |
|
310 ASSERT (result != NULL && length == 1 && result[0] == uc); |
|
311 } |
|
312 } |
|
313 } |
|
314 |
|
315 #endif |