diff libinterp/corefcn/strfns.cc @ 23123:c6ca5fe1505c

use wrappers for uniconv functions; style fixes for unicode conv fcns * boostrap.conf (gnulib_modules): Include localcharset, uniconv/u8-conv-from-enc, and uniconv/u8-conv-to-enc in the list. Remove libunistring from the list. * liboctave/wrappers/localcharset-wrapper.c, liboctave/wrappers/localcharset-wrapper.h, liboctave/wrappers/uniconv-wrappers.c, liboctave/wrappers/uniconv-wrappers.h: New files. * liboctave/wrappers/module.mk: Update. * strfns.cc: Include uniconv-wrappers.h, not uniconv. Assume uniconv functions are available (provided by gnulib). (F__native2unicode__, F__unicode2native__): Use wrapper functions. Simplify. (F__unicode2native__): Return uint8 array. * native2unicode.m, unicode2native.m: Style fixes. Update tests. Use assert directly, not testif. Use error <> ..., not fail.
author John W. Eaton <jwe@octave.org>
date Tue, 31 Jan 2017 13:51:42 -0500
parents e310b5b6da6f
children 092078913d54
line wrap: on
line diff
--- a/libinterp/corefcn/strfns.cc	Sun Jan 22 13:58:57 2017 +0100
+++ b/libinterp/corefcn/strfns.cc	Tue Jan 31 13:51:42 2017 -0500
@@ -29,11 +29,9 @@
 #include <queue>
 #include <sstream>
 
-#ifdef HAVE_LIBUNISTRING
-#  include <uniconv.h>
-#endif
-
 #include "dMatrix.h"
+#include "localcharset-wrapper.h"
+#include "uniconv-wrappers.h"
 
 #include "Cell.h"
 #include "defun.h"
@@ -739,48 +737,47 @@
 @deftypefn {} {@var{utf8_str} =} __native2unicode__ (@var{native_bytes}, @var{codepage})
 Convert byte stream @var{native_bytes} to UTF-8 using @var{codepage}.
 
-
 @seealso{native2unicode, __unicode2native__}
 @end deftypefn */)
 {
-#ifdef HAVE_LIBUNISTRING
   int nargin = args.length ();
 
-  if (nargin < 1 || nargin > 2)
+  if (nargin != 2)
     print_usage ();
 
   if (args(0).is_string ())
-    return ovl(args(0));
+    return ovl (args(0));
 
-  // codepage
-  const char *codepage = locale_charset ();
-  string_vector tmp; 
-  if (! args(1).is_numeric_type ())
-    {
-      tmp = args(1).string_vector_value ();
-      codepage = tmp(0).c_str ();
-    }
+  std::string tmp = args(1).xstring_value ("CODEPAGE must be a string");
+  const char *codepage
+    = tmp.empty () ? octave_locale_charset_wrapper () : tmp.c_str ();
+  
+  charNDArray native_bytes = args(0).char_array_value ();
 
-  // convert byte stream with local encoding to UTF-8
-  charNDArray native_bytes = args(0).char_array_value ();
+  const char *src = native_bytes.data ();
+  size_t srclen = native_bytes.numel ();
+
   size_t length;
-  char *utf8_str = reinterpret_cast<char *>
-                   (u8_conv_from_encoding (codepage, iconveh_question_mark,
-                                           native_bytes.fortran_vec (),
-                                           native_bytes.numel (), NULL,
-                                           NULL, &length));
-  if (utf8_str == NULL)
-    error("native2unicode: Error '%s' converting from codepage '%s' to UTF-8.",
-          std::strerror (errno), codepage);
+  uint8_t *utf8_str = 0;
+
+  octave::unwind_protect frame;
+
+  frame.add_fcn (::free, static_cast<void *> (utf8_str));
+
+  utf8_str = octave_u8_conv_from_encoding (codepage, src, srclen, &length);
 
-  std::string ret_val = std::string (utf8_str, length);
-  free (utf8_str);
-  return ovl (charNDArray (ret_val));
-#else
-  octave_unused_parameter (args);
+  if (! utf8_str)
+    error ("native2unicode: converting from codepage '%s' to UTF-8: %s",
+           codepage, std::strerror (errno));
+
+  octave_idx_type len = length;
 
-  err_disabled_feature ("__native2unicode__", "libunistring");
-#endif
+  charNDArray retval (dim_vector (1, len));
+
+  for (octave_idx_type i = 0; i < len; i++)
+    retval.xelem(i) = utf8_str[i];
+
+  return ovl (retval);
 }
 
 DEFUN (__unicode2native__, args, ,
@@ -789,44 +786,46 @@
 Convert UTF-8 string @var{utf8_str} to byte stream @var{native_bytes} using
 @var{codepage}.
 
-
 @seealso{unicode2native, __native2unicode__}
 @end deftypefn */)
 {
-#ifdef HAVE_LIBUNISTRING
   int nargin = args.length ();
 
   if (nargin != 2)
     print_usage ();
 
-  // codepage
-  const char *codepage = locale_charset ();
-  string_vector tmp; 
-  if (! args(1).is_numeric_type ())
-    {
-      tmp = args(1).string_vector_value ();
-      codepage = tmp(0).c_str ();
-    }
+  std::string tmp = args(1).xstring_value ("CODEPAGE must be a string");
+  const char *codepage
+    = tmp.empty () ? octave_locale_charset_wrapper () : tmp.c_str ();
 
-  // convert UTF-8 string vector to byte-stream with local encoding
-  charNDArray utf8_str = args(0).char_array_value ();
+  charNDArray utf8_str = args(0).xchar_array_value ("UTF8_STR must be a string");
+
+  const uint8_t *src = reinterpret_cast<const uint8_t *> (utf8_str.data ());
+  size_t srclen = utf8_str.numel ();
+
   size_t length;
-  char *native_bytes = u8_conv_to_encoding (codepage, iconveh_question_mark,
-                            reinterpret_cast<uint8_t*> (utf8_str.fortran_vec ()),
-                            utf8_str.numel (), NULL, NULL, &length);
-  if (native_bytes == NULL)
-    error("native2unicode: Error '%s' converting from UTF-8 to codepage '%s'.",
-          std::strerror (errno), codepage);
+  char *native_bytes = 0;
+
+  octave::unwind_protect frame;
+
+  frame.add_fcn (::free, static_cast<void *> (native_bytes));
+
+  native_bytes = octave_u8_conv_to_encoding (codepage, src, srclen, &length);
 
-  std::string ret_val = std::string (native_bytes, length);
-  free (native_bytes);
-  return ovl (NDArray (ret_val));
-#else
-  octave_unused_parameter (args);
+  if (! native_bytes)
+    error ("native2unicode: converting from UTF-8 to codepage '%s': %s",
+           codepage, std::strerror (errno));
+
+  octave_idx_type len = length;
 
-  err_disabled_feature ("__unicode2native__", "libunistring");
-#endif
+  uint8NDArray retval (dim_vector (1, len));
+
+  for (octave_idx_type i = 0; i < len; i++)
+    retval.xelem(i) = native_bytes[i];
+
+  return ovl (retval);
 }
+
 DEFUN (list_in_columns, args, ,
        doc: /* -*- texinfo -*-
 @deftypefn {} {} list_in_columns (@var{arg}, @var{width}, @var{prefix})