Mercurial > octave
diff libinterp/corefcn/strfns.cc @ 23122:e310b5b6da6f
Add functions native2unicode and unicode2native (bug #49842).
* native2unicode.m, unicode2native.m: Add new functions that wrap
__native2unicode__ and __unicode2native__ with input conversions and
checks.
* strfns.cc (F__native2unicode__, F__unicode2native__): New functions.
* bootstrap.conf (gnulib_modules): Add libunistring to the list.
* __unimplemented.m: Delete names from list of missing functions.
* scripts/strings/module.mk: Update.
* strings.txi: Add doc strings to manual.
author | Markus Mützel <markus.muetzel@gmx.de> |
---|---|
date | Sun, 22 Jan 2017 13:58:57 +0100 |
parents | ef4d915df748 |
children | c6ca5fe1505c |
line wrap: on
line diff
--- a/libinterp/corefcn/strfns.cc Tue Jan 31 06:22:13 2017 -0500 +++ b/libinterp/corefcn/strfns.cc Sun Jan 22 13:58:57 2017 +0100 @@ -29,6 +29,10 @@ #include <queue> #include <sstream> +#ifdef HAVE_LIBUNISTRING +# include <uniconv.h> +#endif + #include "dMatrix.h" #include "Cell.h" @@ -730,6 +734,99 @@ %!assert (strncmpi ("abc123", "ABC456", 3), true) */ +DEFUN (__native2unicode__, args, , + doc: /* -*- texinfo -*- +@deftypefn {} {@var{utf8_str} =} __native2unicode__ (@var{native_bytes}, @var{codepage}) +Convert byte stream @var{native_bytes} to UTF-8 using @var{codepage}. + + +@seealso{native2unicode, __unicode2native__} +@end deftypefn */) +{ +#ifdef HAVE_LIBUNISTRING + int nargin = args.length (); + + if (nargin < 1 || nargin > 2) + print_usage (); + + if (args(0).is_string ()) + return ovl(args(0)); + + // codepage + const char *codepage = locale_charset (); + string_vector tmp; + if (! args(1).is_numeric_type ()) + { + tmp = args(1).string_vector_value (); + codepage = tmp(0).c_str (); + } + + // convert byte stream with local encoding to UTF-8 + charNDArray native_bytes = args(0).char_array_value (); + size_t length; + char *utf8_str = reinterpret_cast<char *> + (u8_conv_from_encoding (codepage, iconveh_question_mark, + native_bytes.fortran_vec (), + native_bytes.numel (), NULL, + NULL, &length)); + if (utf8_str == NULL) + error("native2unicode: Error '%s' converting from codepage '%s' to UTF-8.", + std::strerror (errno), codepage); + + std::string ret_val = std::string (utf8_str, length); + free (utf8_str); + return ovl (charNDArray (ret_val)); +#else + octave_unused_parameter (args); + + err_disabled_feature ("__native2unicode__", "libunistring"); +#endif +} + +DEFUN (__unicode2native__, args, , + doc: /* -*- texinfo -*- +@deftypefn {} {@var{native_bytes} =} __unicode2native__ (@var{utf8_str}, @var{codepage}) +Convert UTF-8 string @var{utf8_str} to byte stream @var{native_bytes} using +@var{codepage}. + + +@seealso{unicode2native, __native2unicode__} +@end deftypefn */) +{ +#ifdef HAVE_LIBUNISTRING + int nargin = args.length (); + + if (nargin != 2) + print_usage (); + + // codepage + const char *codepage = locale_charset (); + string_vector tmp; + if (! args(1).is_numeric_type ()) + { + tmp = args(1).string_vector_value (); + codepage = tmp(0).c_str (); + } + + // convert UTF-8 string vector to byte-stream with local encoding + charNDArray utf8_str = args(0).char_array_value (); + size_t length; + char *native_bytes = u8_conv_to_encoding (codepage, iconveh_question_mark, + reinterpret_cast<uint8_t*> (utf8_str.fortran_vec ()), + utf8_str.numel (), NULL, NULL, &length); + if (native_bytes == NULL) + error("native2unicode: Error '%s' converting from UTF-8 to codepage '%s'.", + std::strerror (errno), codepage); + + std::string ret_val = std::string (native_bytes, length); + free (native_bytes); + return ovl (NDArray (ret_val)); +#else + octave_unused_parameter (args); + + err_disabled_feature ("__unicode2native__", "libunistring"); +#endif +} DEFUN (list_in_columns, args, , doc: /* -*- texinfo -*- @deftypefn {} {} list_in_columns (@var{arg}, @var{width}, @var{prefix})