changeset 27812:1626e50fa8ee

Add type switch to "__u8_validate__". * strfns.cc (F__u8_validate__): Add additional input argument type to allow selecting the replacement mode.
author Markus Mützel <markus.muetzel@gmx.de>
date Thu, 12 Dec 2019 19:44:32 +0100
parents 06af06188403
children d7dfab7045d9
files libinterp/corefcn/strfns.cc
diffstat 1 files changed, 21 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/corefcn/strfns.cc	Thu Dec 12 09:23:53 2019 -0800
+++ b/libinterp/corefcn/strfns.cc	Thu Dec 12 19:44:32 2019 +0100
@@ -1044,23 +1044,37 @@
 
 DEFUN (__u8_validate__, args, ,
        doc: /* -*- texinfo -*-
-@deftypefn {} {@var{out_str} =} __u8_validate__ (in_str)
+@deftypefn {} {@var{out_str} =} __u8_validate__ (in_str, mode)
 Return string with valid UTF-8.
 
-On encountering invalid UTF-8, the bytes are interpreted as the Unicode code
-points U+0080–U+00FF with the same value as the byte, thus interpreting the
-bytes according to ISO-8859-1.
+On encountering invalid UTF-8 in @var{in_str}, the bytes are either replaced by
+the replacement character "�" (if @var{mode} is omitted or the string
+"replace") or interpreted as the Unicode code points U+0080–U+00FF with the
+same value as the byte (if @var{mode} is the string "unicode"), thus
+interpreting the bytes according to ISO-8859-1.
 
 @end deftypefn */)
 {
-  if (args.length () != 1)
+  if (args.length () < 1 || args.length () > 2)
     print_usage ();
 
   // Input check
   std::string in_str =
-      args(0).xstring_value ("__u8_validate__: Not a string.");
+      args(0).xstring_value ("__u8_validate__: IN_STR must be a string.");
+
+  std::string mode = "replace";
+  if (args.length () > 1)
+    mode = args(1).xstring_value ("__u8_validate__: MODE must be a string.");
 
-  octave::string::u8_validate ("__u8_validate__", in_str);
+  octave::string::u8_fallback_type fb_type;
+  if (mode == "replace")
+    fb_type = octave::string::U8_REPLACEMENT_CHAR;
+  else if (mode == "unicode")
+    fb_type = octave::string::U8_ISO_8859_1;
+  else
+    error ("__u8_validate__: MODE must either be \"replace\" or \"unicode\".");
+
+  octave::string::u8_validate ("__u8_validate__", in_str, fb_type);
 
   return ovl (in_str);
 }