Mercurial > gnulib
comparison lib/unilbrk/u32-possible-linebreaks.c @ 40213:cc3fed3b7788
unilbrk/u*-possible-linebreaks: Fix undefined behaviour.
Reported by Jeffrey Walton <noloader@gmail.com>.
* lib/unilbrk/u8-possible-linebreaks.c (u8_possible_linebreaks): Don't
invoke memset with a zero size.
* lib/unilbrk/u16-possible-linebreaks.c (u16_possible_linebreaks):
Likewise.
* lib/unilbrk/u32-possible-linebreaks.c (u32_possible_linebreaks):
Adjust accordingly.
author | Bruno Haible <bruno@clisp.org> |
---|---|
date | Sat, 09 Mar 2019 00:27:19 +0100 |
parents | b06060465f09 |
children |
comparison
equal
deleted
inserted
replaced
40212:8da9577294da | 40213:cc3fed3b7788 |
---|---|
26 #include "uniwidth/cjk.h" | 26 #include "uniwidth/cjk.h" |
27 | 27 |
28 void | 28 void |
29 u32_possible_linebreaks (const uint32_t *s, size_t n, const char *encoding, char *p) | 29 u32_possible_linebreaks (const uint32_t *s, size_t n, const char *encoding, char *p) |
30 { | 30 { |
31 int LBP_AI_REPLACEMENT = (is_cjk_encoding (encoding) ? LBP_ID : LBP_AL); | 31 if (n > 0) |
32 const uint32_t *s_end = s + n; | 32 { |
33 int last_prop = LBP_BK; /* line break property of last non-space character */ | 33 int LBP_AI_REPLACEMENT = (is_cjk_encoding (encoding) ? LBP_ID : LBP_AL); |
34 char *seen_space = NULL; /* Was a space seen after the last non-space character? */ | 34 const uint32_t *s_end = s + n; |
35 char *seen_space2 = NULL; /* At least two spaces after the last non-space? */ | 35 int last_prop = LBP_BK; /* line break property of last non-space character */ |
36 char *seen_space = NULL; /* Was a space seen after the last non-space character? */ | |
37 char *seen_space2 = NULL; /* At least two spaces after the last non-space? */ | |
36 | 38 |
37 while (s < s_end) | 39 do |
38 { | 40 { |
39 ucs4_t uc = *s; | 41 ucs4_t uc = *s; |
40 int prop = unilbrkprop_lookup (uc); | 42 int prop = unilbrkprop_lookup (uc); |
41 | 43 |
42 if (prop == LBP_BK) | 44 if (prop == LBP_BK) |
43 { | |
44 /* Mandatory break. */ | |
45 *p = UC_BREAK_MANDATORY; | |
46 last_prop = LBP_BK; | |
47 seen_space = NULL; | |
48 seen_space2 = NULL; | |
49 } | |
50 else | |
51 { | |
52 char *q; | |
53 | |
54 /* Resolve property values whose behaviour is not fixed. */ | |
55 switch (prop) | |
56 { | 45 { |
57 case LBP_AI: | 46 /* Mandatory break. */ |
58 /* Resolve ambiguous. */ | 47 *p = UC_BREAK_MANDATORY; |
59 prop = LBP_AI_REPLACEMENT; | 48 last_prop = LBP_BK; |
60 break; | |
61 case LBP_CB: | |
62 /* This is arbitrary. */ | |
63 prop = LBP_ID; | |
64 break; | |
65 case LBP_SA: | |
66 /* We don't handle complex scripts yet. | |
67 Treat LBP_SA like LBP_XX. */ | |
68 case LBP_XX: | |
69 /* This is arbitrary. */ | |
70 prop = LBP_AL; | |
71 break; | |
72 } | |
73 | |
74 /* Deal with spaces and combining characters. */ | |
75 q = p; | |
76 if (prop == LBP_SP) | |
77 { | |
78 /* Don't break just before a space. */ | |
79 *p = UC_BREAK_PROHIBITED; | |
80 seen_space2 = seen_space; | |
81 seen_space = p; | |
82 } | |
83 else if (prop == LBP_ZW) | |
84 { | |
85 /* Don't break just before a zero-width space. */ | |
86 *p = UC_BREAK_PROHIBITED; | |
87 last_prop = LBP_ZW; | |
88 seen_space = NULL; | 49 seen_space = NULL; |
89 seen_space2 = NULL; | 50 seen_space2 = NULL; |
90 } | 51 } |
91 else if (prop == LBP_CM) | 52 else |
92 { | 53 { |
93 /* Don't break just before a combining character, except immediately after a | 54 char *q; |
94 zero-width space. */ | 55 |
95 if (last_prop == LBP_ZW) | 56 /* Resolve property values whose behaviour is not fixed. */ |
57 switch (prop) | |
96 { | 58 { |
97 /* Break after zero-width space. */ | 59 case LBP_AI: |
98 *p = UC_BREAK_POSSIBLE; | 60 /* Resolve ambiguous. */ |
99 /* A combining character turns a preceding space into LBP_ID. */ | 61 prop = LBP_AI_REPLACEMENT; |
100 last_prop = LBP_ID; | 62 break; |
63 case LBP_CB: | |
64 /* This is arbitrary. */ | |
65 prop = LBP_ID; | |
66 break; | |
67 case LBP_SA: | |
68 /* We don't handle complex scripts yet. | |
69 Treat LBP_SA like LBP_XX. */ | |
70 case LBP_XX: | |
71 /* This is arbitrary. */ | |
72 prop = LBP_AL; | |
73 break; | |
74 } | |
75 | |
76 /* Deal with spaces and combining characters. */ | |
77 q = p; | |
78 if (prop == LBP_SP) | |
79 { | |
80 /* Don't break just before a space. */ | |
81 *p = UC_BREAK_PROHIBITED; | |
82 seen_space2 = seen_space; | |
83 seen_space = p; | |
84 } | |
85 else if (prop == LBP_ZW) | |
86 { | |
87 /* Don't break just before a zero-width space. */ | |
88 *p = UC_BREAK_PROHIBITED; | |
89 last_prop = LBP_ZW; | |
90 seen_space = NULL; | |
91 seen_space2 = NULL; | |
92 } | |
93 else if (prop == LBP_CM) | |
94 { | |
95 /* Don't break just before a combining character, except immediately | |
96 after a zero-width space. */ | |
97 if (last_prop == LBP_ZW) | |
98 { | |
99 /* Break after zero-width space. */ | |
100 *p = UC_BREAK_POSSIBLE; | |
101 /* A combining character turns a preceding space into LBP_ID. */ | |
102 last_prop = LBP_ID; | |
103 } | |
104 else | |
105 { | |
106 *p = UC_BREAK_PROHIBITED; | |
107 /* A combining character turns a preceding space into LBP_ID. */ | |
108 if (seen_space != NULL) | |
109 { | |
110 q = seen_space; | |
111 seen_space = seen_space2; | |
112 prop = LBP_ID; | |
113 goto lookup_via_table; | |
114 } | |
115 } | |
101 } | 116 } |
102 else | 117 else |
103 { | 118 { |
104 *p = UC_BREAK_PROHIBITED; | 119 lookup_via_table: |
105 /* A combining character turns a preceding space into LBP_ID. */ | 120 /* prop must be usable as an index for table 7.3 of UTR #14. */ |
106 if (seen_space != NULL) | 121 if (!(prop >= 0 && prop < sizeof (unilbrk_table) / sizeof (unilbrk_table[0]))) |
122 abort (); | |
123 | |
124 if (last_prop == LBP_BK) | |
107 { | 125 { |
108 q = seen_space; | 126 /* Don't break at the beginning of a line. */ |
109 seen_space = seen_space2; | 127 *q = UC_BREAK_PROHIBITED; |
110 prop = LBP_ID; | |
111 goto lookup_via_table; | |
112 } | 128 } |
129 else if (last_prop == LBP_ZW) | |
130 { | |
131 /* Break after zero-width space. */ | |
132 *q = UC_BREAK_POSSIBLE; | |
133 } | |
134 else | |
135 { | |
136 switch (unilbrk_table [last_prop] [prop]) | |
137 { | |
138 case D: | |
139 *q = UC_BREAK_POSSIBLE; | |
140 break; | |
141 case I: | |
142 *q = (seen_space != NULL ? UC_BREAK_POSSIBLE : UC_BREAK_PROHIBITED); | |
143 break; | |
144 case P: | |
145 *q = UC_BREAK_PROHIBITED; | |
146 break; | |
147 default: | |
148 abort (); | |
149 } | |
150 } | |
151 last_prop = prop; | |
152 seen_space = NULL; | |
153 seen_space2 = NULL; | |
113 } | 154 } |
114 } | 155 } |
115 else | |
116 { | |
117 lookup_via_table: | |
118 /* prop must be usable as an index for table 7.3 of UTR #14. */ | |
119 if (!(prop >= 0 && prop < sizeof (unilbrk_table) / sizeof (unilbrk_table[0]))) | |
120 abort (); | |
121 | 156 |
122 if (last_prop == LBP_BK) | 157 s++; |
123 { | 158 p++; |
124 /* Don't break at the beginning of a line. */ | |
125 *q = UC_BREAK_PROHIBITED; | |
126 } | |
127 else if (last_prop == LBP_ZW) | |
128 { | |
129 /* Break after zero-width space. */ | |
130 *q = UC_BREAK_POSSIBLE; | |
131 } | |
132 else | |
133 { | |
134 switch (unilbrk_table [last_prop] [prop]) | |
135 { | |
136 case D: | |
137 *q = UC_BREAK_POSSIBLE; | |
138 break; | |
139 case I: | |
140 *q = (seen_space != NULL ? UC_BREAK_POSSIBLE : UC_BREAK_PROHIBITED); | |
141 break; | |
142 case P: | |
143 *q = UC_BREAK_PROHIBITED; | |
144 break; | |
145 default: | |
146 abort (); | |
147 } | |
148 } | |
149 last_prop = prop; | |
150 seen_space = NULL; | |
151 seen_space2 = NULL; | |
152 } | |
153 } | 159 } |
154 | 160 while (s < s_end); |
155 s++; | |
156 p++; | |
157 } | 161 } |
158 } | 162 } |