Mercurial > gnulib
annotate lib/uniconv/u8-conv-from-enc.c @ 40212:8da9577294da
unistr/*, uniconv/*: Fix undefined behaviour.
Reported by Jeffrey Walton <noloader@gmail.com>.
* lib/unistr/u-cpy.h (FUNC): Don't invoke memcpy with a zero size.
* lib/unistr/u-cpy-alloc.h (FUNC): Likewise.
* lib/uniconv/u8-conv-from-enc.c (u8_conv_from_encoding): Likewise.
* lib/uniconv/u8-conv-to-enc.c (u8_conv_to_encoding): Likewise.
author | Bruno Haible <bruno@clisp.org> |
---|---|
date | Sat, 09 Mar 2019 00:01:47 +0100 |
parents | b06060465f09 |
children |
rev | line source |
---|---|
7946
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
1 /* Conversion to UTF-8 from legacy encodings. |
40057
b06060465f09
maint: Run 'make update-copyright'
Paul Eggert <eggert@cs.ucla.edu>
parents:
19484
diff
changeset
|
2 Copyright (C) 2002, 2006-2007, 2009-2019 Free Software Foundation, Inc. |
7946
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
3 |
9307
ad8a75a45dc9
Change copyright notice from LGPLv2.0+ to LGPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
8555
diff
changeset
|
4 This program is free software: you can redistribute it and/or modify it |
ad8a75a45dc9
Change copyright notice from LGPLv2.0+ to LGPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
8555
diff
changeset
|
5 under the terms of the GNU Lesser General Public License as published |
ad8a75a45dc9
Change copyright notice from LGPLv2.0+ to LGPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
8555
diff
changeset
|
6 by the Free Software Foundation; either version 3 of the License, or |
ad8a75a45dc9
Change copyright notice from LGPLv2.0+ to LGPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
8555
diff
changeset
|
7 (at your option) any later version. |
7946
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
8 |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
9 This program is distributed in the hope that it will be useful, |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
10 but WITHOUT ANY WARRANTY; without even the implied warranty of |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
9307
ad8a75a45dc9
Change copyright notice from LGPLv2.0+ to LGPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
8555
diff
changeset
|
12 Lesser General Public License for more details. |
7946
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
13 |
9307
ad8a75a45dc9
Change copyright notice from LGPLv2.0+ to LGPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
8555
diff
changeset
|
14 You should have received a copy of the GNU Lesser General Public License |
19190 | 15 along with this program. If not, see <https://www.gnu.org/licenses/>. */ |
7946
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
16 |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
17 /* Written by Bruno Haible <bruno@clisp.org>. */ |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
18 |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
19 #include <config.h> |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
20 |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
21 /* Specification. */ |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
22 #include "uniconv.h" |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
23 |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
24 #include <errno.h> |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
25 #include <stdlib.h> |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
26 #include <string.h> |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
27 |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
28 #include "c-strcaseeq.h" |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
29 #include "striconveha.h" |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
30 #include "unistr.h" |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
31 |
11511
d604b921ed8d
Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents:
11508
diff
changeset
|
32 uint8_t * |
7946
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
33 u8_conv_from_encoding (const char *fromcode, |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
34 enum iconv_ilseq_handler handler, |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
35 const char *src, size_t srclen, |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
36 size_t *offsets, |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
37 uint8_t *resultbuf, size_t *lengthp) |
7946
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
38 { |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
39 if (STRCASEEQ (fromcode, "UTF-8", 'U','T','F','-','8',0,0,0,0)) |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
40 { |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
41 /* Conversion from UTF-8 to UTF-8. No need to go through iconv(). */ |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
42 uint8_t *result; |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
43 |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
44 if (u8_check ((const uint8_t *) src, srclen)) |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
45 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
46 errno = EILSEQ; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
47 return NULL; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
48 } |
7946
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
49 |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
50 if (offsets != NULL) |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
51 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
52 size_t i; |
7946
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
53 |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
54 for (i = 0; i < srclen; ) |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
55 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
56 int count = u8_mblen ((const uint8_t *) src + i, srclen - i); |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
57 /* We can rely on count > 0 because of the previous u8_check. */ |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
58 if (count <= 0) |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
59 abort (); |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
60 offsets[i] = i; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
61 i++; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
62 while (--count > 0) |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
63 offsets[i++] = (size_t)(-1); |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
64 } |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
65 } |
7946
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
66 |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
67 /* Memory allocation. */ |
11511
d604b921ed8d
Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents:
11508
diff
changeset
|
68 if (resultbuf != NULL && *lengthp >= srclen) |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
69 result = resultbuf; |
7946
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
70 else |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
71 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
72 result = (uint8_t *) malloc (srclen > 0 ? srclen : 1); |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
73 if (result == NULL) |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
74 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
75 errno = ENOMEM; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
76 return NULL; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
77 } |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
78 } |
7946
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
79 |
40212
8da9577294da
unistr/*, uniconv/*: Fix undefined behaviour.
Bruno Haible <bruno@clisp.org>
parents:
40057
diff
changeset
|
80 if (srclen > 0) |
8da9577294da
unistr/*, uniconv/*: Fix undefined behaviour.
Bruno Haible <bruno@clisp.org>
parents:
40057
diff
changeset
|
81 memcpy ((char *) result, src, srclen); |
7946
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
82 *lengthp = srclen; |
11511
d604b921ed8d
Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents:
11508
diff
changeset
|
83 return result; |
7946
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
84 } |
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
85 else |
11511
d604b921ed8d
Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents:
11508
diff
changeset
|
86 { |
d604b921ed8d
Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents:
11508
diff
changeset
|
87 char *result = (char *) resultbuf; |
d604b921ed8d
Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents:
11508
diff
changeset
|
88 size_t length = *lengthp; |
d604b921ed8d
Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents:
11508
diff
changeset
|
89 |
d604b921ed8d
Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents:
11508
diff
changeset
|
90 if (mem_iconveha (src, srclen, fromcode, "UTF-8", true, handler, |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
91 offsets, &result, &length) < 0) |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
92 return NULL; |
11511
d604b921ed8d
Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents:
11508
diff
changeset
|
93 |
d604b921ed8d
Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents:
11508
diff
changeset
|
94 if (result == NULL) /* when (resultbuf == NULL && length == 0) */ |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
95 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
96 result = (char *) malloc (1); |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
97 if (result == NULL) |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
98 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
99 errno = ENOMEM; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
100 return NULL; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
101 } |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11511
diff
changeset
|
102 } |
11511
d604b921ed8d
Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents:
11508
diff
changeset
|
103 *lengthp = length; |
d604b921ed8d
Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents:
11508
diff
changeset
|
104 return (uint8_t *) result; |
d604b921ed8d
Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents:
11508
diff
changeset
|
105 } |
7946
a972bb402af3
New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff
changeset
|
106 } |