Mercurial > octave
annotate liboctave/wrappers/uniconv-wrappers.c @ 32088:e2911d0176dc stable
uniconv-wrappers: Avoid freeing uninitialized pointer (bug #64182).
* liboctave/wrappers/uniconv-wrappers.c (octave_u8_conv_to_encoding_intern):
Initialize pointer with NULL that might be freed without assignment otherwise.
author | Markus Mützel <markus.muetzel@gmx.de> |
---|---|
date | Fri, 12 May 2023 08:00:41 +0200 |
parents | f7206b6577c2 |
children | 212145b8e5f0 |
rev | line source |
---|---|
27923
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
1 //////////////////////////////////////////////////////////////////////// |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
2 // |
31706
597f3ee61a48
update Octave Project Developers copyright for the new year
John W. Eaton <jwe@octave.org>
parents:
30822
diff
changeset
|
3 // Copyright (C) 2017-2023 The Octave Project Developers |
27923
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
4 // |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
5 // See the file COPYRIGHT.md in the top-level directory of this |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
6 // distribution or <https://octave.org/copyright/>. |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
7 // |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
8 // This file is part of Octave. |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
9 // |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
10 // Octave is free software: you can redistribute it and/or modify it |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
11 // under the terms of the GNU General Public License as published by |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
12 // the Free Software Foundation, either version 3 of the License, or |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
13 // (at your option) any later version. |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
14 // |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
15 // Octave is distributed in the hope that it will be useful, but |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
16 // WITHOUT ANY WARRANTY; without even the implied warranty of |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
18 // GNU General Public License for more details. |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
19 // |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
20 // You should have received a copy of the GNU General Public License |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
21 // along with Octave; see the file COPYING. If not, see |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
22 // <https://www.gnu.org/licenses/>. |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
23 // |
bd51beb6205e
update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents:
27919
diff
changeset
|
24 //////////////////////////////////////////////////////////////////////// |
23123
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
25 |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
26 // The conversion functions are provided by gnulib. We don't include |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
27 // gnulib headers directly in Octave's C++ source files to avoid |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
28 // problems that may be caused by the way that gnulib overrides standard |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
29 // library functions. |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
30 |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
31 #if defined (HAVE_CONFIG_H) |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
32 # include "config.h" |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
33 #endif |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
34 |
25512
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
35 #include <stdlib.h> |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
36 #include <string.h> |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
37 #include <wchar.h> |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
38 |
23123
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
39 #include "uniconv.h" |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
40 |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
41 #include "uniconv-wrappers.h" |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
42 |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
43 uint8_t * |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
44 octave_u8_conv_from_encoding (const char *fromcode, const char *src, |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
45 size_t srclen, size_t *lengthp) |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
46 { |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
47 return u8_conv_from_encoding (fromcode, iconveh_question_mark, |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
48 src, srclen, NULL, NULL, lengthp); |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
49 } |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
50 |
31967
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
51 static char * |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
52 octave_u8_conv_to_encoding_intern (const char *tocode, |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
53 enum iconv_ilseq_handler handler, |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
54 const uint8_t *src, size_t srclen, |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
55 size_t *offsets, size_t *lengthp) |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
56 { |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
57 // FIXME: It looks like the input to u8_conv_to_encoding must be at least |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
58 // four bytes and zero-terminated to work correctly. Zero-pad input. |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
59 // Should this be fixed in gnulib or iconv instead? |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
60 size_t minlen = 4; |
32072
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
61 size_t padlen = (srclen > minlen ? srclen : minlen); |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
62 |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
63 // Do not zero-terminate when the output encoding is a UTF encoding, i.e., |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
64 // the surrogates are different than a byte. |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
65 if ((tocode[0] != 'u' && tocode[0] != 'U') |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
66 || (tocode[1] != 't' && tocode[1] != 'T') |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
67 || (tocode[2] != 'f' && tocode[2] != 'F')) |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
68 padlen++; |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
69 |
32088
e2911d0176dc
uniconv-wrappers: Avoid freeing uninitialized pointer (bug #64182).
Markus Mützel <markus.muetzel@gmx.de>
parents:
32072
diff
changeset
|
70 uint8_t *u8_str = NULL; |
32072
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
71 const uint8_t *cu8_str; |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
72 if (srclen < padlen) |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
73 { |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
74 u8_str = (uint8_t *) malloc (padlen); |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
75 memcpy (u8_str, src, srclen); |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
76 for (size_t i_pad = 0; i_pad < padlen-srclen; i_pad++) |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
77 u8_str[srclen+i_pad] = 0; |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
78 cu8_str = u8_str; |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
79 } |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
80 else |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
81 cu8_str = src; |
31967
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
82 |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
83 // Convert from UTF-8 to output encoding |
32072
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
84 char *ret = u8_conv_to_encoding (tocode, handler, cu8_str, padlen, |
31967
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
85 offsets, NULL, lengthp); |
32072
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
86 |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
87 if (srclen > padlen) |
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
88 free ((void *) u8_str); |
31967
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
89 |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
90 // FIXME: This assumes that "\0" is converted to a single byte. This might |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
91 // not be true for some exotic output encodings (like UTF-7?). |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
92 *lengthp = (*lengthp <= (padlen-srclen) ? 0 : *lengthp - (padlen-srclen)); |
32072
f7206b6577c2
unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31967
diff
changeset
|
93 |
31967
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
94 return ret; |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
95 } |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
96 |
25512
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
97 char * |
23123
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
98 octave_u8_conv_to_encoding (const char *tocode, const uint8_t *src, |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
99 size_t srclen, size_t *lengthp) |
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
100 { |
31967
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
101 return octave_u8_conv_to_encoding_intern (tocode, iconveh_question_mark, |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
102 src, srclen, NULL, lengthp); |
23123
c6ca5fe1505c
use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
103 } |
25512
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
104 |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
105 char * |
28534
548598760b66
get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents:
27923
diff
changeset
|
106 octave_u8_conv_to_encoding_strict (const char *tocode, const uint8_t *src, |
548598760b66
get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents:
27923
diff
changeset
|
107 size_t srclen, size_t *lengthp) |
548598760b66
get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents:
27923
diff
changeset
|
108 { |
31967
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
109 return octave_u8_conv_to_encoding_intern (tocode, iconveh_error, |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
110 src, srclen, NULL, lengthp); |
28534
548598760b66
get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents:
27923
diff
changeset
|
111 } |
548598760b66
get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents:
27923
diff
changeset
|
112 |
548598760b66
get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents:
27923
diff
changeset
|
113 char * |
26331
5f10217b562d
Use gnulib to check if encoding is possible (bug #55306).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25512
diff
changeset
|
114 octave_u32_conv_to_encoding_strict (const char *tocode, const uint32_t *src, |
28534
548598760b66
get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents:
27923
diff
changeset
|
115 size_t srclen, size_t *lengthp) |
26331
5f10217b562d
Use gnulib to check if encoding is possible (bug #55306).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25512
diff
changeset
|
116 { |
5f10217b562d
Use gnulib to check if encoding is possible (bug #55306).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25512
diff
changeset
|
117 return u32_conv_to_encoding (tocode, iconveh_error, |
28534
548598760b66
get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents:
27923
diff
changeset
|
118 src, srclen, NULL, NULL, lengthp); |
26331
5f10217b562d
Use gnulib to check if encoding is possible (bug #55306).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25512
diff
changeset
|
119 } |
5f10217b562d
Use gnulib to check if encoding is possible (bug #55306).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25512
diff
changeset
|
120 |
30822
0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents:
30564
diff
changeset
|
121 uint8_t * |
0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents:
30564
diff
changeset
|
122 octave_u8_conv_from_encoding_offsets |
0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents:
30564
diff
changeset
|
123 (const char *fromcode, const char *src, size_t srclen, |
0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents:
30564
diff
changeset
|
124 size_t *offsets, size_t *lengthp) |
0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents:
30564
diff
changeset
|
125 { |
0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents:
30564
diff
changeset
|
126 return u8_conv_from_encoding (fromcode, iconveh_question_mark, |
0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents:
30564
diff
changeset
|
127 src, srclen, offsets, NULL, lengthp); |
0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents:
30564
diff
changeset
|
128 } |
0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents:
30564
diff
changeset
|
129 |
0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents:
30564
diff
changeset
|
130 char * |
0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents:
30564
diff
changeset
|
131 octave_u8_conv_to_encoding_offsets |
0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents:
30564
diff
changeset
|
132 (const char *tocode, const uint8_t *src, size_t srclen, |
0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents:
30564
diff
changeset
|
133 size_t *offsets, size_t *lengthp) |
0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents:
30564
diff
changeset
|
134 { |
31967
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
135 return octave_u8_conv_to_encoding_intern (tocode, iconveh_question_mark, |
470134b3fc28
Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents:
31706
diff
changeset
|
136 src, srclen, offsets, lengthp); |
30822
0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents:
30564
diff
changeset
|
137 } |
0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents:
30564
diff
changeset
|
138 |
26331
5f10217b562d
Use gnulib to check if encoding is possible (bug #55306).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25512
diff
changeset
|
139 char * |
25512
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
140 u8_from_wchar (const wchar_t *wc) |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
141 { |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
142 // Convert wide char array to multibyte UTF-8 char array |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
143 // The memory at the returned pointer must be freed after use. |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
144 |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
145 size_t srclen = wcslen (wc) * sizeof (wchar_t); |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
146 const char *src = (const char *) wc; |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
147 |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
148 size_t length = 0; |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
149 uint8_t *mbchar = u8_conv_from_encoding ("wchar_t", iconveh_question_mark, |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
150 src, srclen, NULL, NULL, &length); |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
151 |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
152 // result might not be 0 terminated |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
153 char *retval = malloc (length + 1); |
26475
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
154 if (retval) |
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
155 { |
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
156 memcpy (retval, mbchar, length); |
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
157 free ((void *) mbchar); |
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
158 retval[length] = 0; // 0 terminate string |
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
159 } |
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
160 else |
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
161 free ((void *) mbchar); |
25512
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
162 |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
163 return retval; |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
164 } |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
165 |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
166 wchar_t * |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
167 u8_to_wchar (const char *u8) |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
168 { |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
169 // Convert multibyte UTF-8 char array to wide char array |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
170 // The memory at the returned pointer must be freed after use. |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
171 |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
172 size_t srclen = strlen (u8); |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
173 const uint8_t *src = (const uint8_t *) u8; |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
174 |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
175 size_t length = 0; |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
176 |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
177 char *wchar = u8_conv_to_encoding ("wchar_t", iconveh_question_mark, |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
178 src, srclen, NULL, NULL, &length); |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
179 // result might not be 0 terminated |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
180 wchar_t *retval = malloc (length + 1 * sizeof (wchar_t)); |
26475
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
181 if (retval) |
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
182 { |
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
183 memcpy (retval, wchar, length); |
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
184 free ((void *) wchar); |
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
185 retval[length / sizeof (wchar_t)] = 0; // 0 terminate string |
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
186 } |
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
187 |
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
188 else |
9baba1815f1c
uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents:
26376
diff
changeset
|
189 free ((void *) wchar); |
25512
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
190 |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
191 return retval; |
7335d44f34b4
Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents:
25054
diff
changeset
|
192 } |