Mercurial > octave
changeset 25413:39cf8145405f
Make "tolower" and "toupper" Unicode aware (bug #53873).
* ov-ch-mat.cc (map): Use UTF-8 aware functions for "tolower" and "toupper".
* mappers.cc: Add tests for "tolower" and "toupper".
* unicase-wrappers.[c/h]: Add wrappers for "u8_tolower" and "u8_toupper".
* module.mk: Add new files.
* bootstrap.conf: Add modules "unicase/u8-tolower" and "unicase/u8-toupper".
author | Markus Mützel <markus.muetzel@gmx.de> |
---|---|
date | Wed, 16 May 2018 21:36:27 +0200 |
parents | 922a93fc73ec |
children | 8fae933e7228 |
files | bootstrap.conf libinterp/corefcn/mappers.cc libinterp/octave-value/ov-ch-mat.cc liboctave/wrappers/module.mk liboctave/wrappers/unicase-wrappers.c liboctave/wrappers/unicase-wrappers.h |
diffstat | 6 files changed, 129 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/bootstrap.conf Sun May 27 19:20:47 2018 +0200 +++ b/bootstrap.conf Wed May 16 21:36:27 2018 +0200 @@ -91,6 +91,8 @@ tempname tmpfile uname + unicase/u8-tolower + unicase/u8-toupper uniconv/u8-conv-from-enc uniconv/u8-conv-to-enc unistd
--- a/libinterp/corefcn/mappers.cc Sun May 27 19:20:47 2018 +0200 +++ b/libinterp/corefcn/mappers.cc Wed May 16 21:36:27 2018 +0200 @@ -2139,6 +2139,8 @@ %!assert (tolower ({"ABC", "DEF", {"GHI", {"JKL"}}}), {"abc", "def", {"ghi", {"jkl"}}}) %!assert (tolower (["ABC"; "DEF"]), ["abc"; "def"]) %!assert (tolower ({["ABC"; "DEF"]}), {["abc";"def"]}) +%!assert (tolower (["ABCÄÖÜSS"; "abcäöüß"]), ["abcäöüss"; "abcäöüß"]) +%!assert (tolower (repmat ("ÄÖÜ", 2, 1, 3)), repmat ("äöü", 2, 1, 3)) %!assert (tolower (68), 68) %!assert (tolower ({[68, 68; 68, 68]}), {[68, 68; 68, 68]}) %!assert (tolower (68i), 68i) @@ -2203,6 +2205,8 @@ %!assert (toupper ({"abc", "def", {"ghi", {"jkl"}}}), {"ABC", "DEF", {"GHI", {"JKL"}}}) %!assert (toupper (["abc"; "def"]), ["ABC"; "DEF"]) %!assert (toupper ({["abc"; "def"]}), {["ABC";"DEF"]}) +%!assert (toupper (["ABCÄÖÜSS"; "abcäöüß"]), ["ABCÄÖÜSS"; "ABCÄÖÜSS"]) +%!assert (toupper (repmat ("äöü", 2, 1, 3)), repmat ("ÄÖÜ", 2, 1, 3)) %!assert (toupper (100), 100) %!assert (toupper ({[100, 100; 100, 100]}), {[100, 100; 100, 100]}) %!assert (toupper (100i), 100i)
--- a/libinterp/octave-value/ov-ch-mat.cc Sun May 27 19:20:47 2018 +0200 +++ b/libinterp/octave-value/ov-ch-mat.cc Wed May 16 21:36:27 2018 +0200 @@ -41,6 +41,7 @@ #include "lo-ieee.h" #include "mx-base.h" +#include "unicase-wrappers.h" #include "mxarray.h" #include "ov-base.h" @@ -270,8 +271,37 @@ STRING_MAPPER (xisspace, std::isspace, bool); STRING_MAPPER (xisupper, std::isupper, bool); STRING_MAPPER (xisxdigit, std::isxdigit, bool); - STRING_MAPPER (xtolower, std::tolower, char); - STRING_MAPPER (xtoupper, std::toupper, char); + +#define STRING_U8_FCN(UMAP,U8_FCN,STD_FCN) \ + case umap_ ## UMAP: \ + { \ + charNDArray in_m = matrix; \ + Array<octave_idx_type> p (dim_vector (matrix.ndims (), 1)); \ + if (matrix.ndims () > 1) \ + { \ + for (octave_idx_type i=0; i < matrix.ndims (); i++) \ + p(i) = i; \ + p(0) = 1; \ + p(1) = 0; \ + in_m = matrix.permute (p); \ + } \ + size_t output_length = in_m.numel (); \ + charNDArray ch_array = charNDArray (in_m.dims ()); \ + const uint8_t *in = reinterpret_cast<const uint8_t *> (in_m.data ()); \ + uint8_t *buf = reinterpret_cast<uint8_t *> (ch_array.fortran_vec ()); \ + U8_FCN (in, matrix.numel (), nullptr, buf, &output_length); \ + if (output_length != static_cast<size_t> (matrix.numel ())) \ + { \ + warning_with_id ("octave:multi_byte_char_length", \ + "UMAP: Possible multi-byte error."); \ + return octave_value (matrix.map<char, int (&) (int)> (STD_FCN)); \ + } \ + return octave_value ((matrix.ndims () > 1) ? ch_array.permute (p, true)\ + : ch_array); \ + } + + STRING_U8_FCN (xtolower, octave_u8_tolower_wrapper, std::tolower); + STRING_U8_FCN (xtoupper, octave_u8_toupper_wrapper, std::toupper); // For Matlab compatibility, these should work on ASCII values // without error or warning.
--- a/liboctave/wrappers/module.mk Sun May 27 19:20:47 2018 +0200 +++ b/liboctave/wrappers/module.mk Wed May 16 21:36:27 2018 +0200 @@ -29,6 +29,7 @@ %reldir%/time-wrappers.h \ %reldir%/tmpfile-wrapper.h \ %reldir%/uname-wrapper.h \ + %reldir%/unicase-wrappers.h \ %reldir%/uniconv-wrappers.h \ %reldir%/unistd-wrappers.h \ %reldir%/unistr-wrappers.h \ @@ -68,6 +69,7 @@ %reldir%/time-wrappers.c \ %reldir%/tmpfile-wrapper.c \ %reldir%/uname-wrapper.c \ + %reldir%/unicase-wrappers.c \ %reldir%/uniconv-wrappers.c \ %reldir%/unistd-wrappers.c \ %reldir%/unistr-wrappers.c \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/wrappers/unicase-wrappers.c Wed May 16 21:36:27 2018 +0200 @@ -0,0 +1,45 @@ +/* + +Copyright (C) 2018 Markus Mützel + +This file is part of Octave. + +Octave is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Octave is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +<https://www.gnu.org/licenses/>. + +*/ + +#if defined (HAVE_CONFIG_H) +# include "config.h" +#endif + +#include "unicase.h" + +#include "unicase-wrappers.h" + +uint8_t * +octave_u8_tolower_wrapper (const uint8_t *s, size_t n, + const char *iso639_language, + uint8_t *resultbuf, size_t *lengthp) +{ + return u8_tolower (s, n, iso639_language, NULL, resultbuf, lengthp); +} + +uint8_t * +octave_u8_toupper_wrapper (const uint8_t *s, size_t n, + const char *iso639_language, + uint8_t *resultbuf, size_t *lengthp) +{ + return u8_toupper (s, n, iso639_language, NULL, resultbuf, lengthp); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/wrappers/unicase-wrappers.h Wed May 16 21:36:27 2018 +0200 @@ -0,0 +1,44 @@ +/* + +Copyright (C) 2018 Markus Mützel + +This file is part of Octave. + +Octave is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Octave is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +<https://www.gnu.org/licenses/>. + +*/ + +#if ! defined (octave_unicase_wrappers_h) +#define octave_unicase_wrappers_h 1 + +#if defined __cplusplus +extern "C" { +#endif + +extern uint8_t * +octave_u8_tolower_wrapper (const uint8_t *s, size_t n, + const char *iso639_language, + uint8_t *resultbuf, size_t *lengthp); + +extern uint8_t * +octave_u8_toupper_wrapper (const uint8_t *s, size_t n, + const char *iso639_language, + uint8_t *resultbuf, size_t *lengthp); + +#if defined __cplusplus +} +#endif + +#endif