# HG changeset patch # User Markus Mützel # Date 1526499387 -7200 # Node ID 39cf8145405fecfa0d7789c8c1763b2a48632a6e # Parent 922a93fc73ecd9970fdb8628db478a51bca8ceba Make "tolower" and "toupper" Unicode aware (bug #53873). * ov-ch-mat.cc (map): Use UTF-8 aware functions for "tolower" and "toupper". * mappers.cc: Add tests for "tolower" and "toupper". * unicase-wrappers.[c/h]: Add wrappers for "u8_tolower" and "u8_toupper". * module.mk: Add new files. * bootstrap.conf: Add modules "unicase/u8-tolower" and "unicase/u8-toupper". diff -r 922a93fc73ec -r 39cf8145405f bootstrap.conf --- a/bootstrap.conf Sun May 27 19:20:47 2018 +0200 +++ b/bootstrap.conf Wed May 16 21:36:27 2018 +0200 @@ -91,6 +91,8 @@ tempname tmpfile uname + unicase/u8-tolower + unicase/u8-toupper uniconv/u8-conv-from-enc uniconv/u8-conv-to-enc unistd diff -r 922a93fc73ec -r 39cf8145405f libinterp/corefcn/mappers.cc --- a/libinterp/corefcn/mappers.cc Sun May 27 19:20:47 2018 +0200 +++ b/libinterp/corefcn/mappers.cc Wed May 16 21:36:27 2018 +0200 @@ -2139,6 +2139,8 @@ %!assert (tolower ({"ABC", "DEF", {"GHI", {"JKL"}}}), {"abc", "def", {"ghi", {"jkl"}}}) %!assert (tolower (["ABC"; "DEF"]), ["abc"; "def"]) %!assert (tolower ({["ABC"; "DEF"]}), {["abc";"def"]}) +%!assert (tolower (["ABCÄÖÜSS"; "abcäöüß"]), ["abcäöüss"; "abcäöüß"]) +%!assert (tolower (repmat ("ÄÖÜ", 2, 1, 3)), repmat ("äöü", 2, 1, 3)) %!assert (tolower (68), 68) %!assert (tolower ({[68, 68; 68, 68]}), {[68, 68; 68, 68]}) %!assert (tolower (68i), 68i) @@ -2203,6 +2205,8 @@ %!assert (toupper ({"abc", "def", {"ghi", {"jkl"}}}), {"ABC", "DEF", {"GHI", {"JKL"}}}) %!assert (toupper (["abc"; "def"]), ["ABC"; "DEF"]) %!assert (toupper ({["abc"; "def"]}), {["ABC";"DEF"]}) +%!assert (toupper (["ABCÄÖÜSS"; "abcäöüß"]), ["ABCÄÖÜSS"; "ABCÄÖÜSS"]) +%!assert (toupper (repmat ("äöü", 2, 1, 3)), repmat ("ÄÖÜ", 2, 1, 3)) %!assert (toupper (100), 100) %!assert (toupper ({[100, 100; 100, 100]}), {[100, 100; 100, 100]}) %!assert (toupper (100i), 100i) diff -r 922a93fc73ec -r 39cf8145405f libinterp/octave-value/ov-ch-mat.cc --- a/libinterp/octave-value/ov-ch-mat.cc Sun May 27 19:20:47 2018 +0200 +++ b/libinterp/octave-value/ov-ch-mat.cc Wed May 16 21:36:27 2018 +0200 @@ -41,6 +41,7 @@ #include "lo-ieee.h" #include "mx-base.h" +#include "unicase-wrappers.h" #include "mxarray.h" #include "ov-base.h" @@ -270,8 +271,37 @@ STRING_MAPPER (xisspace, std::isspace, bool); STRING_MAPPER (xisupper, std::isupper, bool); STRING_MAPPER (xisxdigit, std::isxdigit, bool); - STRING_MAPPER (xtolower, std::tolower, char); - STRING_MAPPER (xtoupper, std::toupper, char); + +#define STRING_U8_FCN(UMAP,U8_FCN,STD_FCN) \ + case umap_ ## UMAP: \ + { \ + charNDArray in_m = matrix; \ + Array p (dim_vector (matrix.ndims (), 1)); \ + if (matrix.ndims () > 1) \ + { \ + for (octave_idx_type i=0; i < matrix.ndims (); i++) \ + p(i) = i; \ + p(0) = 1; \ + p(1) = 0; \ + in_m = matrix.permute (p); \ + } \ + size_t output_length = in_m.numel (); \ + charNDArray ch_array = charNDArray (in_m.dims ()); \ + const uint8_t *in = reinterpret_cast (in_m.data ()); \ + uint8_t *buf = reinterpret_cast (ch_array.fortran_vec ()); \ + U8_FCN (in, matrix.numel (), nullptr, buf, &output_length); \ + if (output_length != static_cast (matrix.numel ())) \ + { \ + warning_with_id ("octave:multi_byte_char_length", \ + "UMAP: Possible multi-byte error."); \ + return octave_value (matrix.map (STD_FCN)); \ + } \ + return octave_value ((matrix.ndims () > 1) ? ch_array.permute (p, true)\ + : ch_array); \ + } + + STRING_U8_FCN (xtolower, octave_u8_tolower_wrapper, std::tolower); + STRING_U8_FCN (xtoupper, octave_u8_toupper_wrapper, std::toupper); // For Matlab compatibility, these should work on ASCII values // without error or warning. diff -r 922a93fc73ec -r 39cf8145405f liboctave/wrappers/module.mk --- a/liboctave/wrappers/module.mk Sun May 27 19:20:47 2018 +0200 +++ b/liboctave/wrappers/module.mk Wed May 16 21:36:27 2018 +0200 @@ -29,6 +29,7 @@ %reldir%/time-wrappers.h \ %reldir%/tmpfile-wrapper.h \ %reldir%/uname-wrapper.h \ + %reldir%/unicase-wrappers.h \ %reldir%/uniconv-wrappers.h \ %reldir%/unistd-wrappers.h \ %reldir%/unistr-wrappers.h \ @@ -68,6 +69,7 @@ %reldir%/time-wrappers.c \ %reldir%/tmpfile-wrapper.c \ %reldir%/uname-wrapper.c \ + %reldir%/unicase-wrappers.c \ %reldir%/uniconv-wrappers.c \ %reldir%/unistd-wrappers.c \ %reldir%/unistr-wrappers.c \ diff -r 922a93fc73ec -r 39cf8145405f liboctave/wrappers/unicase-wrappers.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/wrappers/unicase-wrappers.c Wed May 16 21:36:27 2018 +0200 @@ -0,0 +1,45 @@ +/* + +Copyright (C) 2018 Markus Mützel + +This file is part of Octave. + +Octave is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Octave is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if defined (HAVE_CONFIG_H) +# include "config.h" +#endif + +#include "unicase.h" + +#include "unicase-wrappers.h" + +uint8_t * +octave_u8_tolower_wrapper (const uint8_t *s, size_t n, + const char *iso639_language, + uint8_t *resultbuf, size_t *lengthp) +{ + return u8_tolower (s, n, iso639_language, NULL, resultbuf, lengthp); +} + +uint8_t * +octave_u8_toupper_wrapper (const uint8_t *s, size_t n, + const char *iso639_language, + uint8_t *resultbuf, size_t *lengthp) +{ + return u8_toupper (s, n, iso639_language, NULL, resultbuf, lengthp); +} diff -r 922a93fc73ec -r 39cf8145405f liboctave/wrappers/unicase-wrappers.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/wrappers/unicase-wrappers.h Wed May 16 21:36:27 2018 +0200 @@ -0,0 +1,44 @@ +/* + +Copyright (C) 2018 Markus Mützel + +This file is part of Octave. + +Octave is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Octave is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if ! defined (octave_unicase_wrappers_h) +#define octave_unicase_wrappers_h 1 + +#if defined __cplusplus +extern "C" { +#endif + +extern uint8_t * +octave_u8_tolower_wrapper (const uint8_t *s, size_t n, + const char *iso639_language, + uint8_t *resultbuf, size_t *lengthp); + +extern uint8_t * +octave_u8_toupper_wrapper (const uint8_t *s, size_t n, + const char *iso639_language, + uint8_t *resultbuf, size_t *lengthp); + +#if defined __cplusplus +} +#endif + +#endif