annotate liboctave/wrappers/uniconv-wrappers.c @ 32089:212145b8e5f0

maint: Merge stable to default.
author Markus Mützel <markus.muetzel@gmx.de>
date Fri, 12 May 2023 08:03:14 +0200
parents 24752aa8be11 e2911d0176dc
children 2e484f9f1f18
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
27923
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
1 ////////////////////////////////////////////////////////////////////////
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
2 //
31706
597f3ee61a48 update Octave Project Developers copyright for the new year
John W. Eaton <jwe@octave.org>
parents: 30822
diff changeset
3 // Copyright (C) 2017-2023 The Octave Project Developers
27923
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
4 //
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
5 // See the file COPYRIGHT.md in the top-level directory of this
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
6 // distribution or <https://octave.org/copyright/>.
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
7 //
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
8 // This file is part of Octave.
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
9 //
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
10 // Octave is free software: you can redistribute it and/or modify it
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
11 // under the terms of the GNU General Public License as published by
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
12 // the Free Software Foundation, either version 3 of the License, or
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
13 // (at your option) any later version.
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
14 //
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
15 // Octave is distributed in the hope that it will be useful, but
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
18 // GNU General Public License for more details.
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
19 //
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
20 // You should have received a copy of the GNU General Public License
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
21 // along with Octave; see the file COPYING. If not, see
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
22 // <https://www.gnu.org/licenses/>.
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
23 //
bd51beb6205e update formatting of copyright notices
John W. Eaton <jwe@octave.org>
parents: 27919
diff changeset
24 ////////////////////////////////////////////////////////////////////////
23123
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
25
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
26 // The conversion functions are provided by gnulib. We don't include
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
27 // gnulib headers directly in Octave's C++ source files to avoid
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
28 // problems that may be caused by the way that gnulib overrides standard
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
29 // library functions.
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
30
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
31 #if defined (HAVE_CONFIG_H)
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
32 # include "config.h"
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
33 #endif
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
34
25512
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
35 #include <stdlib.h>
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
36 #include <string.h>
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
37 #include <wchar.h>
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
38
23123
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
39 #include "uniconv.h"
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
40
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
41 #include "uniconv-wrappers.h"
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
42
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
43 uint8_t *
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
44 octave_u8_conv_from_encoding (const char *fromcode, const char *src,
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
45 size_t srclen, size_t *lengthp)
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
46 {
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
47 return u8_conv_from_encoding (fromcode, iconveh_question_mark,
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
48 src, srclen, NULL, NULL, lengthp);
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
49 }
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
50
31967
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
51 static char *
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
52 octave_u8_conv_to_encoding_intern (const char *tocode,
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
53 enum iconv_ilseq_handler handler,
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
54 const uint8_t *src, size_t srclen,
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
55 size_t *offsets, size_t *lengthp)
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
56 {
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
57 // FIXME: It looks like the input to u8_conv_to_encoding must be at least
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
58 // four bytes and zero-terminated to work correctly. Zero-pad input.
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
59 // Should this be fixed in gnulib or iconv instead?
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
60 size_t minlen = 4;
32072
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
61 size_t padlen = (srclen > minlen ? srclen : minlen);
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
62
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
63 // Do not zero-terminate when the output encoding is a UTF encoding, i.e.,
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
64 // the surrogates are different than a byte.
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
65 if ((tocode[0] != 'u' && tocode[0] != 'U')
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
66 || (tocode[1] != 't' && tocode[1] != 'T')
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
67 || (tocode[2] != 'f' && tocode[2] != 'F'))
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
68 padlen++;
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
69
32088
e2911d0176dc uniconv-wrappers: Avoid freeing uninitialized pointer (bug #64182).
Markus Mützel <markus.muetzel@gmx.de>
parents: 32072
diff changeset
70 uint8_t *u8_str = NULL;
32072
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
71 const uint8_t *cu8_str;
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
72 if (srclen < padlen)
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
73 {
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
74 u8_str = (uint8_t *) malloc (padlen);
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
75 memcpy (u8_str, src, srclen);
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
76 for (size_t i_pad = 0; i_pad < padlen-srclen; i_pad++)
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
77 u8_str[srclen+i_pad] = 0;
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
78 cu8_str = u8_str;
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
79 }
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
80 else
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
81 cu8_str = src;
31967
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
82
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
83 // Convert from UTF-8 to output encoding
32072
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
84 char *ret = u8_conv_to_encoding (tocode, handler, cu8_str, padlen,
31967
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
85 offsets, NULL, lengthp);
32072
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
86
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
87 if (srclen > padlen)
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
88 free ((void *) u8_str);
31967
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
89
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
90 // FIXME: This assumes that "\0" is converted to a single byte. This might
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
91 // not be true for some exotic output encodings (like UTF-7?).
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
92 *lengthp = (*lengthp <= (padlen-srclen) ? 0 : *lengthp - (padlen-srclen));
32072
f7206b6577c2 unicode2native: Fix conversion to UTF-16 (bug #64139).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
93
31967
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
94 return ret;
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
95 }
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
96
25512
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
97 char *
23123
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
98 octave_u8_conv_to_encoding (const char *tocode, const uint8_t *src,
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
99 size_t srclen, size_t *lengthp)
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
100 {
31967
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
101 return octave_u8_conv_to_encoding_intern (tocode, iconveh_question_mark,
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
102 src, srclen, NULL, lengthp);
23123
c6ca5fe1505c use wrappers for uniconv functions; style fixes for unicode conv fcns
John W. Eaton <jwe@octave.org>
parents:
diff changeset
103 }
25512
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
104
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
105 char *
28534
548598760b66 get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents: 27923
diff changeset
106 octave_u8_conv_to_encoding_strict (const char *tocode, const uint8_t *src,
548598760b66 get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents: 27923
diff changeset
107 size_t srclen, size_t *lengthp)
548598760b66 get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents: 27923
diff changeset
108 {
31967
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
109 return octave_u8_conv_to_encoding_intern (tocode, iconveh_error,
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
110 src, srclen, NULL, lengthp);
28534
548598760b66 get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents: 27923
diff changeset
111 }
548598760b66 get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents: 27923
diff changeset
112
32023
bce1850f8104 gui: Use iconv/gnulib to load editor content from files in any encoding.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32021
diff changeset
113 uint16_t *
bce1850f8104 gui: Use iconv/gnulib to load editor content from files in any encoding.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32021
diff changeset
114 octave_u16_conv_from_encoding (const char *fromcode, const char *src,
bce1850f8104 gui: Use iconv/gnulib to load editor content from files in any encoding.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32021
diff changeset
115 size_t srclen, size_t *lengthp)
bce1850f8104 gui: Use iconv/gnulib to load editor content from files in any encoding.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32021
diff changeset
116 {
bce1850f8104 gui: Use iconv/gnulib to load editor content from files in any encoding.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32021
diff changeset
117 return u16_conv_from_encoding (fromcode, iconveh_question_mark,
bce1850f8104 gui: Use iconv/gnulib to load editor content from files in any encoding.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32021
diff changeset
118 src, srclen, NULL, NULL, lengthp);
bce1850f8104 gui: Use iconv/gnulib to load editor content from files in any encoding.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32021
diff changeset
119 }
bce1850f8104 gui: Use iconv/gnulib to load editor content from files in any encoding.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32021
diff changeset
120
bce1850f8104 gui: Use iconv/gnulib to load editor content from files in any encoding.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32021
diff changeset
121 uint16_t *
bce1850f8104 gui: Use iconv/gnulib to load editor content from files in any encoding.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32021
diff changeset
122 octave_u16_conv_from_encoding_strict (const char *fromcode, const char *src,
bce1850f8104 gui: Use iconv/gnulib to load editor content from files in any encoding.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32021
diff changeset
123 size_t srclen, size_t *lengthp)
bce1850f8104 gui: Use iconv/gnulib to load editor content from files in any encoding.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32021
diff changeset
124 {
bce1850f8104 gui: Use iconv/gnulib to load editor content from files in any encoding.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32021
diff changeset
125 return u16_conv_from_encoding (fromcode, iconveh_error,
bce1850f8104 gui: Use iconv/gnulib to load editor content from files in any encoding.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32021
diff changeset
126 src, srclen, NULL, NULL, lengthp);
bce1850f8104 gui: Use iconv/gnulib to load editor content from files in any encoding.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32021
diff changeset
127 }
bce1850f8104 gui: Use iconv/gnulib to load editor content from files in any encoding.
Markus Mützel <markus.muetzel@gmx.de>
parents: 32021
diff changeset
128
28534
548598760b66 get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents: 27923
diff changeset
129 char *
32021
da2954782945 gui: Use iconv/gnulib to save editor content in non-UTF-8.
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
130 octave_u16_conv_to_encoding (const char *tocode, const uint16_t *src,
da2954782945 gui: Use iconv/gnulib to save editor content in non-UTF-8.
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
131 size_t srclen, size_t *lengthp)
da2954782945 gui: Use iconv/gnulib to save editor content in non-UTF-8.
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
132 {
da2954782945 gui: Use iconv/gnulib to save editor content in non-UTF-8.
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
133 return u16_conv_to_encoding (tocode, iconveh_question_mark,
da2954782945 gui: Use iconv/gnulib to save editor content in non-UTF-8.
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
134 src, srclen, NULL, NULL, lengthp);
da2954782945 gui: Use iconv/gnulib to save editor content in non-UTF-8.
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
135 }
da2954782945 gui: Use iconv/gnulib to save editor content in non-UTF-8.
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
136
da2954782945 gui: Use iconv/gnulib to save editor content in non-UTF-8.
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
137 char *
da2954782945 gui: Use iconv/gnulib to save editor content in non-UTF-8.
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
138 octave_u16_conv_to_encoding_strict (const char *tocode, const uint16_t *src,
da2954782945 gui: Use iconv/gnulib to save editor content in non-UTF-8.
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
139 size_t srclen, size_t *lengthp)
da2954782945 gui: Use iconv/gnulib to save editor content in non-UTF-8.
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
140 {
da2954782945 gui: Use iconv/gnulib to save editor content in non-UTF-8.
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
141 return u16_conv_to_encoding (tocode, iconveh_error,
da2954782945 gui: Use iconv/gnulib to save editor content in non-UTF-8.
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
142 src, srclen, NULL, NULL, lengthp);
da2954782945 gui: Use iconv/gnulib to save editor content in non-UTF-8.
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
143 }
da2954782945 gui: Use iconv/gnulib to save editor content in non-UTF-8.
Markus Mützel <markus.muetzel@gmx.de>
parents: 31967
diff changeset
144
28534
548598760b66 get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents: 27923
diff changeset
145 char *
26331
5f10217b562d Use gnulib to check if encoding is possible (bug #55306).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25512
diff changeset
146 octave_u32_conv_to_encoding_strict (const char *tocode, const uint32_t *src,
28534
548598760b66 get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents: 27923
diff changeset
147 size_t srclen, size_t *lengthp)
26331
5f10217b562d Use gnulib to check if encoding is possible (bug #55306).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25512
diff changeset
148 {
5f10217b562d Use gnulib to check if encoding is possible (bug #55306).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25512
diff changeset
149 return u32_conv_to_encoding (tocode, iconveh_error,
28534
548598760b66 get_ASCII_filename: On Windows, optionally try to convert to the locale charset.
Markus Mützel <markus.muetzel@gmx.de>
parents: 27923
diff changeset
150 src, srclen, NULL, NULL, lengthp);
26331
5f10217b562d Use gnulib to check if encoding is possible (bug #55306).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25512
diff changeset
151 }
5f10217b562d Use gnulib to check if encoding is possible (bug #55306).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25512
diff changeset
152
30822
0826c503f294 Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents: 30564
diff changeset
153 uint8_t *
0826c503f294 Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents: 30564
diff changeset
154 octave_u8_conv_from_encoding_offsets
0826c503f294 Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents: 30564
diff changeset
155 (const char *fromcode, const char *src, size_t srclen,
0826c503f294 Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents: 30564
diff changeset
156 size_t *offsets, size_t *lengthp)
0826c503f294 Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents: 30564
diff changeset
157 {
0826c503f294 Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents: 30564
diff changeset
158 return u8_conv_from_encoding (fromcode, iconveh_question_mark,
0826c503f294 Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents: 30564
diff changeset
159 src, srclen, offsets, NULL, lengthp);
0826c503f294 Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents: 30564
diff changeset
160 }
0826c503f294 Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents: 30564
diff changeset
161
0826c503f294 Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents: 30564
diff changeset
162 char *
0826c503f294 Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents: 30564
diff changeset
163 octave_u8_conv_to_encoding_offsets
0826c503f294 Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents: 30564
diff changeset
164 (const char *tocode, const uint8_t *src, size_t srclen,
0826c503f294 Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents: 30564
diff changeset
165 size_t *offsets, size_t *lengthp)
0826c503f294 Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents: 30564
diff changeset
166 {
31967
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
167 return octave_u8_conv_to_encoding_intern (tocode, iconveh_question_mark,
470134b3fc28 Fix converting short char arrays with invalid UTF-8 (bug #63930).
Markus Mützel <markus.muetzel@gmx.de>
parents: 31706
diff changeset
168 src, srclen, offsets, lengthp);
30822
0826c503f294 Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents: 30564
diff changeset
169 }
0826c503f294 Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
Markus Mützel <markus.muetzel@gmx.de>
parents: 30564
diff changeset
170
26331
5f10217b562d Use gnulib to check if encoding is possible (bug #55306).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25512
diff changeset
171 char *
25512
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
172 u8_from_wchar (const wchar_t *wc)
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
173 {
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
174 // Convert wide char array to multibyte UTF-8 char array
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
175 // The memory at the returned pointer must be freed after use.
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
176
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
177 size_t srclen = wcslen (wc) * sizeof (wchar_t);
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
178 const char *src = (const char *) wc;
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
179
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
180 size_t length = 0;
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
181 uint8_t *mbchar = u8_conv_from_encoding ("wchar_t", iconveh_question_mark,
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
182 src, srclen, NULL, NULL, &length);
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
183
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
184 // result might not be 0 terminated
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
185 char *retval = malloc (length + 1);
26475
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
186 if (retval)
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
187 {
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
188 memcpy (retval, mbchar, length);
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
189 free ((void *) mbchar);
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
190 retval[length] = 0; // 0 terminate string
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
191 }
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
192 else
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
193 free ((void *) mbchar);
25512
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
194
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
195 return retval;
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
196 }
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
197
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
198 wchar_t *
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
199 u8_to_wchar (const char *u8)
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
200 {
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
201 // Convert multibyte UTF-8 char array to wide char array
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
202 // The memory at the returned pointer must be freed after use.
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
203
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
204 size_t srclen = strlen (u8);
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
205 const uint8_t *src = (const uint8_t *) u8;
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
206
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
207 size_t length = 0;
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
208
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
209 char *wchar = u8_conv_to_encoding ("wchar_t", iconveh_question_mark,
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
210 src, srclen, NULL, NULL, &length);
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
211 // result might not be 0 terminated
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
212 wchar_t *retval = malloc (length + 1 * sizeof (wchar_t));
26475
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
213 if (retval)
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
214 {
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
215 memcpy (retval, wchar, length);
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
216 free ((void *) wchar);
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
217 retval[length / sizeof (wchar_t)] = 0; // 0 terminate string
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
218 }
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
219
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
220 else
9baba1815f1c uniconv-wrappers.c: Fix static analyzer detected issues (bug #55347).
Rik <rik@octave.org>
parents: 26376
diff changeset
221 free ((void *) wchar);
25512
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
222
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
223 return retval;
7335d44f34b4 Add conversion functions between UTF-8 and wchar_t (bug #49118).
Markus Mützel <markus.muetzel@gmx.de>
parents: 25054
diff changeset
224 }