diff libinterp/corefcn/strfns.cc @ 23122:e310b5b6da6f

Add functions native2unicode and unicode2native (bug #49842). * native2unicode.m, unicode2native.m: Add new functions that wrap __native2unicode__ and __unicode2native__ with input conversions and checks. * strfns.cc (F__native2unicode__, F__unicode2native__): New functions. * bootstrap.conf (gnulib_modules): Add libunistring to the list. * __unimplemented.m: Delete names from list of missing functions. * scripts/strings/module.mk: Update. * strings.txi: Add doc strings to manual.
author Markus Mützel <markus.muetzel@gmx.de>
date Sun, 22 Jan 2017 13:58:57 +0100
parents ef4d915df748
children c6ca5fe1505c
line wrap: on
line diff
--- a/libinterp/corefcn/strfns.cc	Tue Jan 31 06:22:13 2017 -0500
+++ b/libinterp/corefcn/strfns.cc	Sun Jan 22 13:58:57 2017 +0100
@@ -29,6 +29,10 @@
 #include <queue>
 #include <sstream>
 
+#ifdef HAVE_LIBUNISTRING
+#  include <uniconv.h>
+#endif
+
 #include "dMatrix.h"
 
 #include "Cell.h"
@@ -730,6 +734,99 @@
 %!assert (strncmpi ("abc123", "ABC456", 3), true)
 */
 
+DEFUN (__native2unicode__, args, ,
+       doc: /* -*- texinfo -*-
+@deftypefn {} {@var{utf8_str} =} __native2unicode__ (@var{native_bytes}, @var{codepage})
+Convert byte stream @var{native_bytes} to UTF-8 using @var{codepage}.
+
+
+@seealso{native2unicode, __unicode2native__}
+@end deftypefn */)
+{
+#ifdef HAVE_LIBUNISTRING
+  int nargin = args.length ();
+
+  if (nargin < 1 || nargin > 2)
+    print_usage ();
+
+  if (args(0).is_string ())
+    return ovl(args(0));
+
+  // codepage
+  const char *codepage = locale_charset ();
+  string_vector tmp; 
+  if (! args(1).is_numeric_type ())
+    {
+      tmp = args(1).string_vector_value ();
+      codepage = tmp(0).c_str ();
+    }
+
+  // convert byte stream with local encoding to UTF-8
+  charNDArray native_bytes = args(0).char_array_value ();
+  size_t length;
+  char *utf8_str = reinterpret_cast<char *>
+                   (u8_conv_from_encoding (codepage, iconveh_question_mark,
+                                           native_bytes.fortran_vec (),
+                                           native_bytes.numel (), NULL,
+                                           NULL, &length));
+  if (utf8_str == NULL)
+    error("native2unicode: Error '%s' converting from codepage '%s' to UTF-8.",
+          std::strerror (errno), codepage);
+
+  std::string ret_val = std::string (utf8_str, length);
+  free (utf8_str);
+  return ovl (charNDArray (ret_val));
+#else
+  octave_unused_parameter (args);
+
+  err_disabled_feature ("__native2unicode__", "libunistring");
+#endif
+}
+
+DEFUN (__unicode2native__, args, ,
+       doc: /* -*- texinfo -*-
+@deftypefn {} {@var{native_bytes} =} __unicode2native__ (@var{utf8_str}, @var{codepage})
+Convert UTF-8 string @var{utf8_str} to byte stream @var{native_bytes} using
+@var{codepage}.
+
+
+@seealso{unicode2native, __native2unicode__}
+@end deftypefn */)
+{
+#ifdef HAVE_LIBUNISTRING
+  int nargin = args.length ();
+
+  if (nargin != 2)
+    print_usage ();
+
+  // codepage
+  const char *codepage = locale_charset ();
+  string_vector tmp; 
+  if (! args(1).is_numeric_type ())
+    {
+      tmp = args(1).string_vector_value ();
+      codepage = tmp(0).c_str ();
+    }
+
+  // convert UTF-8 string vector to byte-stream with local encoding
+  charNDArray utf8_str = args(0).char_array_value ();
+  size_t length;
+  char *native_bytes = u8_conv_to_encoding (codepage, iconveh_question_mark,
+                            reinterpret_cast<uint8_t*> (utf8_str.fortran_vec ()),
+                            utf8_str.numel (), NULL, NULL, &length);
+  if (native_bytes == NULL)
+    error("native2unicode: Error '%s' converting from UTF-8 to codepage '%s'.",
+          std::strerror (errno), codepage);
+
+  std::string ret_val = std::string (native_bytes, length);
+  free (native_bytes);
+  return ovl (NDArray (ret_val));
+#else
+  octave_unused_parameter (args);
+
+  err_disabled_feature ("__unicode2native__", "libunistring");
+#endif
+}
 DEFUN (list_in_columns, args, ,
        doc: /* -*- texinfo -*-
 @deftypefn {} {} list_in_columns (@var{arg}, @var{width}, @var{prefix})