Mercurial > octave
diff libinterp/octave-value/ov-ch-mat.cc @ 25415:d4bc8590b5cf
Make "is*" string functions Unicode aware.
* ov-ch-mat.cc (map): Use UTF-8 aware functions for "is*" string functions.
* mappers.cc: Add tests for "is*" string functions.
* unistr-wrappers.[c/h]: Add wrapper for "u8_strmbtouc".
* unictype-wrappers.[c/h]: Add wrappers for UTF-8 aware C-like "is*" functions.
* module.mk: Add new files.
* bootstrap.conf: Add new modules.
author | Markus Mützel <markus.muetzel@gmx.de> |
---|---|
date | Sun, 27 May 2018 22:13:24 +0200 |
parents | 39cf8145405f |
children | cb1606f78f6b |
line wrap: on
line diff
--- a/libinterp/octave-value/ov-ch-mat.cc Tue May 29 09:08:25 2018 +0200 +++ b/libinterp/octave-value/ov-ch-mat.cc Sun May 27 22:13:24 2018 +0200 @@ -42,6 +42,8 @@ #include "lo-ieee.h" #include "mx-base.h" #include "unicase-wrappers.h" +#include "unictype-wrappers.h" +#include "unistr-wrappers.h" #include "mxarray.h" #include "ov-base.h" @@ -259,18 +261,49 @@ case umap_ ## UMAP: \ return octave_value (matrix.map<TYPE, int (&) (int)> (FCN)) - STRING_MAPPER (xisalnum, std::isalnum, bool); - STRING_MAPPER (xisalpha, std::isalpha, bool); STRING_MAPPER (xisascii, xisascii, bool); - STRING_MAPPER (xiscntrl, std::iscntrl, bool); - STRING_MAPPER (xisdigit, std::isdigit, bool); - STRING_MAPPER (xisgraph, std::isgraph, bool); - STRING_MAPPER (xislower, std::islower, bool); - STRING_MAPPER (xisprint, std::isprint, bool); - STRING_MAPPER (xispunct, std::ispunct, bool); - STRING_MAPPER (xisspace, std::isspace, bool); - STRING_MAPPER (xisupper, std::isupper, bool); - STRING_MAPPER (xisxdigit, std::isxdigit, bool); + +#define STRING_U8_MAPPER(UMAP,FCN) \ + case umap_ ## UMAP: \ + { \ + charNDArray in_m = matrix; \ + Array<octave_idx_type> p (dim_vector (matrix.ndims (), 1)); \ + if (matrix.ndims () > 1) \ + { \ + for (octave_idx_type i=0; i < matrix.ndims (); i++) \ + p(i) = i; \ + p(0) = 1; \ + p(1) = 0; \ + in_m = matrix.permute (p); \ + } \ + boolNDArray b_array = boolNDArray (in_m.dims ()); \ + const uint8_t *in = reinterpret_cast<const uint8_t *> (in_m.data ()); \ + uint32_t uc; \ + for (octave_idx_type i = 0; i < in_m.numel (); ) \ + { \ + int mblen = octave_u8_strmbtouc_wrapper (&uc, in + i); \ + if (mblen < 1) \ + mblen = 1; \ + bool is_upper = FCN (uc); \ + for (int j = 0; j < mblen; j++) \ + b_array(i+j) = is_upper; \ + i += mblen; \ + } \ + return octave_value ((matrix.ndims () > 1) ? b_array.permute (p, true) \ + : b_array); \ + } + + STRING_U8_MAPPER (xisalnum, octave_uc_is_alnum_wrapper); + STRING_U8_MAPPER (xisalpha, octave_uc_is_alpha_wrapper); + STRING_U8_MAPPER (xiscntrl, octave_uc_is_cntrl_wrapper); + STRING_U8_MAPPER (xisdigit, octave_uc_is_digit_wrapper); + STRING_U8_MAPPER (xisgraph, octave_uc_is_graph_wrapper); + STRING_U8_MAPPER (xislower, octave_uc_is_lower_wrapper); + STRING_U8_MAPPER (xisprint, octave_uc_is_print_wrapper); + STRING_U8_MAPPER (xispunct, octave_uc_is_punct_wrapper); + STRING_U8_MAPPER (xisspace, octave_uc_is_space_wrapper); + STRING_U8_MAPPER (xisupper, octave_uc_is_upper_wrapper); + STRING_U8_MAPPER (xisxdigit, octave_uc_is_xdigit_wrapper); #define STRING_U8_FCN(UMAP,U8_FCN,STD_FCN) \ case umap_ ## UMAP: \ @@ -278,13 +311,13 @@ charNDArray in_m = matrix; \ Array<octave_idx_type> p (dim_vector (matrix.ndims (), 1)); \ if (matrix.ndims () > 1) \ - { \ - for (octave_idx_type i=0; i < matrix.ndims (); i++) \ - p(i) = i; \ - p(0) = 1; \ - p(1) = 0; \ - in_m = matrix.permute (p); \ - } \ + { \ + for (octave_idx_type i=0; i < matrix.ndims (); i++) \ + p(i) = i; \ + p(0) = 1; \ + p(1) = 0; \ + in_m = matrix.permute (p); \ + } \ size_t output_length = in_m.numel (); \ charNDArray ch_array = charNDArray (in_m.dims ()); \ const uint8_t *in = reinterpret_cast<const uint8_t *> (in_m.data ()); \