Mercurial > octave
comparison scripts/strings/unicode2native.m @ 32072:f7206b6577c2 stable
unicode2native: Fix conversion to UTF-16 (bug #64139).
* liboctave/wrappers/uniconv-wrappers.c (octave_u8_conv_to_encoding_intern):
Avoid appending a zero-byte when converting to UTF-* to avoid having to strip
a varying number of bytes after the conversion.
* scripts/strings/unicode2native.m: Add test for conversion to UTF-16.
author | Markus Mützel <markus.muetzel@gmx.de> |
---|---|
date | Wed, 03 May 2023 20:43:36 +0200 |
parents | 470134b3fc28 |
children | fab3e312a0b4 |
comparison
equal
deleted
inserted
replaced
32070:bc46d7c2768f | 32072:f7206b6577c2 |
---|---|
80 %!assert <*60480> (unicode2native (''), uint8 ([])) | 80 %!assert <*60480> (unicode2native (''), uint8 ([])) |
81 | 81 |
82 # short character arrays with invalid UTF-8 | 82 # short character arrays with invalid UTF-8 |
83 %!testif HAVE_ICONV <*63930> | 83 %!testif HAVE_ICONV <*63930> |
84 %! assert (unicode2native (char (230), 'windows-1252'), uint8 (63)); | 84 %! assert (unicode2native (char (230), 'windows-1252'), uint8 (63)); |
85 %!testif HAVE_ICONV <*63930> | |
85 %! assert (unicode2native (char (249), 'windows-1252'), uint8 (63)); | 86 %! assert (unicode2native (char (249), 'windows-1252'), uint8 (63)); |
87 %!testif HAVE_ICONV <*63930> | |
86 %! assert (unicode2native (char (230:231), 'windows-1252'), uint8 ([63, 63])); | 88 %! assert (unicode2native (char (230:231), 'windows-1252'), uint8 ([63, 63])); |
89 %!testif HAVE_ICONV <*63930> | |
87 %! assert (unicode2native (char (230:234), 'windows-1252'), | 90 %! assert (unicode2native (char (230:234), 'windows-1252'), |
88 %! uint8 ([63, 63, 63, 63, 63])); | 91 %! uint8 ([63, 63, 63, 63, 63])); |
92 %!testif HAVE_ICONV <*63930> | |
89 %! assert (unicode2native (char ([230, 10]), 'windows-1252'), | 93 %! assert (unicode2native (char ([230, 10]), 'windows-1252'), |
90 %! uint8 ([63, 10])); | 94 %! uint8 ([63, 10])); |
95 | |
96 # target encoding with surrogates larger than a byte | |
97 %!testif HAVE_ICONV <*64139> | |
98 %! assert (typecast (unicode2native ('abcde', | |
99 %! ['utf-16', nthargout(3, 'computer'), 'e']), | |
100 %! 'uint16'), | |
101 %! uint16 (97:101)); | |
91 | 102 |
92 %!error <Invalid call> unicode2native () | 103 %!error <Invalid call> unicode2native () |
93 %!error <called with too many inputs> unicode2native ('a', 'ISO-8859-1', 'test') | 104 %!error <called with too many inputs> unicode2native ('a', 'ISO-8859-1', 'test') |
94 %!error <UTF8_STR must be a character vector> unicode2native (['ab'; 'cd']) | 105 %!error <UTF8_STR must be a character vector> unicode2native (['ab'; 'cd']) |
95 %!error <UTF8_STR must be a character vector> unicode2native ({1 2 3 4}) | 106 %!error <UTF8_STR must be a character vector> unicode2native ({1 2 3 4}) |