annotate lib/uniconv/u8-conv-from-enc.c @ 40212:8da9577294da

unistr/*, uniconv/*: Fix undefined behaviour. Reported by Jeffrey Walton <noloader@gmail.com>. * lib/unistr/u-cpy.h (FUNC): Don't invoke memcpy with a zero size. * lib/unistr/u-cpy-alloc.h (FUNC): Likewise. * lib/uniconv/u8-conv-from-enc.c (u8_conv_from_encoding): Likewise. * lib/uniconv/u8-conv-to-enc.c (u8_conv_to_encoding): Likewise.
author Bruno Haible <bruno@clisp.org>
date Sat, 09 Mar 2019 00:01:47 +0100
parents b06060465f09
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7946
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
1 /* Conversion to UTF-8 from legacy encodings.
40057
b06060465f09 maint: Run 'make update-copyright'
Paul Eggert <eggert@cs.ucla.edu>
parents: 19484
diff changeset
2 Copyright (C) 2002, 2006-2007, 2009-2019 Free Software Foundation, Inc.
7946
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
3
9307
ad8a75a45dc9 Change copyright notice from LGPLv2.0+ to LGPLv3+.
Bruno Haible <bruno@clisp.org>
parents: 8555
diff changeset
4 This program is free software: you can redistribute it and/or modify it
ad8a75a45dc9 Change copyright notice from LGPLv2.0+ to LGPLv3+.
Bruno Haible <bruno@clisp.org>
parents: 8555
diff changeset
5 under the terms of the GNU Lesser General Public License as published
ad8a75a45dc9 Change copyright notice from LGPLv2.0+ to LGPLv3+.
Bruno Haible <bruno@clisp.org>
parents: 8555
diff changeset
6 by the Free Software Foundation; either version 3 of the License, or
ad8a75a45dc9 Change copyright notice from LGPLv2.0+ to LGPLv3+.
Bruno Haible <bruno@clisp.org>
parents: 8555
diff changeset
7 (at your option) any later version.
7946
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
8
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
9 This program is distributed in the hope that it will be useful,
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9307
ad8a75a45dc9 Change copyright notice from LGPLv2.0+ to LGPLv3+.
Bruno Haible <bruno@clisp.org>
parents: 8555
diff changeset
12 Lesser General Public License for more details.
7946
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
13
9307
ad8a75a45dc9 Change copyright notice from LGPLv2.0+ to LGPLv3+.
Bruno Haible <bruno@clisp.org>
parents: 8555
diff changeset
14 You should have received a copy of the GNU Lesser General Public License
19190
9759915b2aca all: prefer https: URLs
Paul Eggert <eggert@cs.ucla.edu>
parents: 18626
diff changeset
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
7946
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
16
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
17 /* Written by Bruno Haible <bruno@clisp.org>. */
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
18
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
19 #include <config.h>
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
20
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
21 /* Specification. */
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
22 #include "uniconv.h"
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
23
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
24 #include <errno.h>
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
25 #include <stdlib.h>
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
26 #include <string.h>
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
27
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
28 #include "c-strcaseeq.h"
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
29 #include "striconveha.h"
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
30 #include "unistr.h"
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
31
11511
d604b921ed8d Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents: 11508
diff changeset
32 uint8_t *
7946
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
33 u8_conv_from_encoding (const char *fromcode,
12421
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
34 enum iconv_ilseq_handler handler,
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
35 const char *src, size_t srclen,
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
36 size_t *offsets,
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
37 uint8_t *resultbuf, size_t *lengthp)
7946
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
38 {
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
39 if (STRCASEEQ (fromcode, "UTF-8", 'U','T','F','-','8',0,0,0,0))
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
40 {
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
41 /* Conversion from UTF-8 to UTF-8. No need to go through iconv(). */
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
42 uint8_t *result;
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
43
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
44 if (u8_check ((const uint8_t *) src, srclen))
12421
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
45 {
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
46 errno = EILSEQ;
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
47 return NULL;
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
48 }
7946
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
49
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
50 if (offsets != NULL)
12421
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
51 {
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
52 size_t i;
7946
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
53
12421
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
54 for (i = 0; i < srclen; )
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
55 {
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
56 int count = u8_mblen ((const uint8_t *) src + i, srclen - i);
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
57 /* We can rely on count > 0 because of the previous u8_check. */
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
58 if (count <= 0)
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
59 abort ();
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
60 offsets[i] = i;
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
61 i++;
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
62 while (--count > 0)
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
63 offsets[i++] = (size_t)(-1);
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
64 }
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
65 }
7946
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
66
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
67 /* Memory allocation. */
11511
d604b921ed8d Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents: 11508
diff changeset
68 if (resultbuf != NULL && *lengthp >= srclen)
12421
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
69 result = resultbuf;
7946
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
70 else
12421
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
71 {
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
72 result = (uint8_t *) malloc (srclen > 0 ? srclen : 1);
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
73 if (result == NULL)
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
74 {
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
75 errno = ENOMEM;
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
76 return NULL;
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
77 }
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
78 }
7946
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
79
40212
8da9577294da unistr/*, uniconv/*: Fix undefined behaviour.
Bruno Haible <bruno@clisp.org>
parents: 40057
diff changeset
80 if (srclen > 0)
8da9577294da unistr/*, uniconv/*: Fix undefined behaviour.
Bruno Haible <bruno@clisp.org>
parents: 40057
diff changeset
81 memcpy ((char *) result, src, srclen);
7946
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
82 *lengthp = srclen;
11511
d604b921ed8d Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents: 11508
diff changeset
83 return result;
7946
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
84 }
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
85 else
11511
d604b921ed8d Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents: 11508
diff changeset
86 {
d604b921ed8d Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents: 11508
diff changeset
87 char *result = (char *) resultbuf;
d604b921ed8d Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents: 11508
diff changeset
88 size_t length = *lengthp;
d604b921ed8d Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents: 11508
diff changeset
89
d604b921ed8d Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents: 11508
diff changeset
90 if (mem_iconveha (src, srclen, fromcode, "UTF-8", true, handler,
12421
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
91 offsets, &result, &length) < 0)
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
92 return NULL;
11511
d604b921ed8d Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents: 11508
diff changeset
93
d604b921ed8d Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents: 11508
diff changeset
94 if (result == NULL) /* when (resultbuf == NULL && length == 0) */
12421
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
95 {
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
96 result = (char *) malloc (1);
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
97 if (result == NULL)
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
98 {
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
99 errno = ENOMEM;
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
100 return NULL;
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
101 }
e8d2c6fc33ad Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents: 11511
diff changeset
102 }
11511
d604b921ed8d Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents: 11508
diff changeset
103 *lengthp = length;
d604b921ed8d Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents: 11508
diff changeset
104 return (uint8_t *) result;
d604b921ed8d Simplify calling convention of u*_conv_from_encoding.
Bruno Haible <bruno@clisp.org>
parents: 11508
diff changeset
105 }
7946
a972bb402af3 New module 'uniconv/u8-conv-from-enc'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
106 }