comparison lib/unilbrk/u32-possible-linebreaks.c @ 40213:cc3fed3b7788

unilbrk/u*-possible-linebreaks: Fix undefined behaviour. Reported by Jeffrey Walton <noloader@gmail.com>. * lib/unilbrk/u8-possible-linebreaks.c (u8_possible_linebreaks): Don't invoke memset with a zero size. * lib/unilbrk/u16-possible-linebreaks.c (u16_possible_linebreaks): Likewise. * lib/unilbrk/u32-possible-linebreaks.c (u32_possible_linebreaks): Adjust accordingly.
author Bruno Haible <bruno@clisp.org>
date Sat, 09 Mar 2019 00:27:19 +0100
parents b06060465f09
children
comparison
equal deleted inserted replaced
40212:8da9577294da 40213:cc3fed3b7788
26 #include "uniwidth/cjk.h" 26 #include "uniwidth/cjk.h"
27 27
28 void 28 void
29 u32_possible_linebreaks (const uint32_t *s, size_t n, const char *encoding, char *p) 29 u32_possible_linebreaks (const uint32_t *s, size_t n, const char *encoding, char *p)
30 { 30 {
31 int LBP_AI_REPLACEMENT = (is_cjk_encoding (encoding) ? LBP_ID : LBP_AL); 31 if (n > 0)
32 const uint32_t *s_end = s + n; 32 {
33 int last_prop = LBP_BK; /* line break property of last non-space character */ 33 int LBP_AI_REPLACEMENT = (is_cjk_encoding (encoding) ? LBP_ID : LBP_AL);
34 char *seen_space = NULL; /* Was a space seen after the last non-space character? */ 34 const uint32_t *s_end = s + n;
35 char *seen_space2 = NULL; /* At least two spaces after the last non-space? */ 35 int last_prop = LBP_BK; /* line break property of last non-space character */
36 char *seen_space = NULL; /* Was a space seen after the last non-space character? */
37 char *seen_space2 = NULL; /* At least two spaces after the last non-space? */
36 38
37 while (s < s_end) 39 do
38 { 40 {
39 ucs4_t uc = *s; 41 ucs4_t uc = *s;
40 int prop = unilbrkprop_lookup (uc); 42 int prop = unilbrkprop_lookup (uc);
41 43
42 if (prop == LBP_BK) 44 if (prop == LBP_BK)
43 {
44 /* Mandatory break. */
45 *p = UC_BREAK_MANDATORY;
46 last_prop = LBP_BK;
47 seen_space = NULL;
48 seen_space2 = NULL;
49 }
50 else
51 {
52 char *q;
53
54 /* Resolve property values whose behaviour is not fixed. */
55 switch (prop)
56 { 45 {
57 case LBP_AI: 46 /* Mandatory break. */
58 /* Resolve ambiguous. */ 47 *p = UC_BREAK_MANDATORY;
59 prop = LBP_AI_REPLACEMENT; 48 last_prop = LBP_BK;
60 break;
61 case LBP_CB:
62 /* This is arbitrary. */
63 prop = LBP_ID;
64 break;
65 case LBP_SA:
66 /* We don't handle complex scripts yet.
67 Treat LBP_SA like LBP_XX. */
68 case LBP_XX:
69 /* This is arbitrary. */
70 prop = LBP_AL;
71 break;
72 }
73
74 /* Deal with spaces and combining characters. */
75 q = p;
76 if (prop == LBP_SP)
77 {
78 /* Don't break just before a space. */
79 *p = UC_BREAK_PROHIBITED;
80 seen_space2 = seen_space;
81 seen_space = p;
82 }
83 else if (prop == LBP_ZW)
84 {
85 /* Don't break just before a zero-width space. */
86 *p = UC_BREAK_PROHIBITED;
87 last_prop = LBP_ZW;
88 seen_space = NULL; 49 seen_space = NULL;
89 seen_space2 = NULL; 50 seen_space2 = NULL;
90 } 51 }
91 else if (prop == LBP_CM) 52 else
92 { 53 {
93 /* Don't break just before a combining character, except immediately after a 54 char *q;
94 zero-width space. */ 55
95 if (last_prop == LBP_ZW) 56 /* Resolve property values whose behaviour is not fixed. */
57 switch (prop)
96 { 58 {
97 /* Break after zero-width space. */ 59 case LBP_AI:
98 *p = UC_BREAK_POSSIBLE; 60 /* Resolve ambiguous. */
99 /* A combining character turns a preceding space into LBP_ID. */ 61 prop = LBP_AI_REPLACEMENT;
100 last_prop = LBP_ID; 62 break;
63 case LBP_CB:
64 /* This is arbitrary. */
65 prop = LBP_ID;
66 break;
67 case LBP_SA:
68 /* We don't handle complex scripts yet.
69 Treat LBP_SA like LBP_XX. */
70 case LBP_XX:
71 /* This is arbitrary. */
72 prop = LBP_AL;
73 break;
74 }
75
76 /* Deal with spaces and combining characters. */
77 q = p;
78 if (prop == LBP_SP)
79 {
80 /* Don't break just before a space. */
81 *p = UC_BREAK_PROHIBITED;
82 seen_space2 = seen_space;
83 seen_space = p;
84 }
85 else if (prop == LBP_ZW)
86 {
87 /* Don't break just before a zero-width space. */
88 *p = UC_BREAK_PROHIBITED;
89 last_prop = LBP_ZW;
90 seen_space = NULL;
91 seen_space2 = NULL;
92 }
93 else if (prop == LBP_CM)
94 {
95 /* Don't break just before a combining character, except immediately
96 after a zero-width space. */
97 if (last_prop == LBP_ZW)
98 {
99 /* Break after zero-width space. */
100 *p = UC_BREAK_POSSIBLE;
101 /* A combining character turns a preceding space into LBP_ID. */
102 last_prop = LBP_ID;
103 }
104 else
105 {
106 *p = UC_BREAK_PROHIBITED;
107 /* A combining character turns a preceding space into LBP_ID. */
108 if (seen_space != NULL)
109 {
110 q = seen_space;
111 seen_space = seen_space2;
112 prop = LBP_ID;
113 goto lookup_via_table;
114 }
115 }
101 } 116 }
102 else 117 else
103 { 118 {
104 *p = UC_BREAK_PROHIBITED; 119 lookup_via_table:
105 /* A combining character turns a preceding space into LBP_ID. */ 120 /* prop must be usable as an index for table 7.3 of UTR #14. */
106 if (seen_space != NULL) 121 if (!(prop >= 0 && prop < sizeof (unilbrk_table) / sizeof (unilbrk_table[0])))
122 abort ();
123
124 if (last_prop == LBP_BK)
107 { 125 {
108 q = seen_space; 126 /* Don't break at the beginning of a line. */
109 seen_space = seen_space2; 127 *q = UC_BREAK_PROHIBITED;
110 prop = LBP_ID;
111 goto lookup_via_table;
112 } 128 }
129 else if (last_prop == LBP_ZW)
130 {
131 /* Break after zero-width space. */
132 *q = UC_BREAK_POSSIBLE;
133 }
134 else
135 {
136 switch (unilbrk_table [last_prop] [prop])
137 {
138 case D:
139 *q = UC_BREAK_POSSIBLE;
140 break;
141 case I:
142 *q = (seen_space != NULL ? UC_BREAK_POSSIBLE : UC_BREAK_PROHIBITED);
143 break;
144 case P:
145 *q = UC_BREAK_PROHIBITED;
146 break;
147 default:
148 abort ();
149 }
150 }
151 last_prop = prop;
152 seen_space = NULL;
153 seen_space2 = NULL;
113 } 154 }
114 } 155 }
115 else
116 {
117 lookup_via_table:
118 /* prop must be usable as an index for table 7.3 of UTR #14. */
119 if (!(prop >= 0 && prop < sizeof (unilbrk_table) / sizeof (unilbrk_table[0])))
120 abort ();
121 156
122 if (last_prop == LBP_BK) 157 s++;
123 { 158 p++;
124 /* Don't break at the beginning of a line. */
125 *q = UC_BREAK_PROHIBITED;
126 }
127 else if (last_prop == LBP_ZW)
128 {
129 /* Break after zero-width space. */
130 *q = UC_BREAK_POSSIBLE;
131 }
132 else
133 {
134 switch (unilbrk_table [last_prop] [prop])
135 {
136 case D:
137 *q = UC_BREAK_POSSIBLE;
138 break;
139 case I:
140 *q = (seen_space != NULL ? UC_BREAK_POSSIBLE : UC_BREAK_PROHIBITED);
141 break;
142 case P:
143 *q = UC_BREAK_PROHIBITED;
144 break;
145 default:
146 abort ();
147 }
148 }
149 last_prop = prop;
150 seen_space = NULL;
151 seen_space2 = NULL;
152 }
153 } 159 }
154 160 while (s < s_end);
155 s++;
156 p++;
157 } 161 }
158 } 162 }