# HG changeset patch # User Markus Mützel # Date 1576176272 -3600 # Node ID 1626e50fa8eec7692d2e4a154d51aa96dc1952d7 # Parent 06af06188403d0ce30d366db3ac1fe29f87da987 Add type switch to "__u8_validate__". * strfns.cc (F__u8_validate__): Add additional input argument type to allow selecting the replacement mode. diff -r 06af06188403 -r 1626e50fa8ee libinterp/corefcn/strfns.cc --- a/libinterp/corefcn/strfns.cc Thu Dec 12 09:23:53 2019 -0800 +++ b/libinterp/corefcn/strfns.cc Thu Dec 12 19:44:32 2019 +0100 @@ -1044,23 +1044,37 @@ DEFUN (__u8_validate__, args, , doc: /* -*- texinfo -*- -@deftypefn {} {@var{out_str} =} __u8_validate__ (in_str) +@deftypefn {} {@var{out_str} =} __u8_validate__ (in_str, mode) Return string with valid UTF-8. -On encountering invalid UTF-8, the bytes are interpreted as the Unicode code -points U+0080–U+00FF with the same value as the byte, thus interpreting the -bytes according to ISO-8859-1. +On encountering invalid UTF-8 in @var{in_str}, the bytes are either replaced by +the replacement character "�" (if @var{mode} is omitted or the string +"replace") or interpreted as the Unicode code points U+0080–U+00FF with the +same value as the byte (if @var{mode} is the string "unicode"), thus +interpreting the bytes according to ISO-8859-1. @end deftypefn */) { - if (args.length () != 1) + if (args.length () < 1 || args.length () > 2) print_usage (); // Input check std::string in_str = - args(0).xstring_value ("__u8_validate__: Not a string."); + args(0).xstring_value ("__u8_validate__: IN_STR must be a string."); + + std::string mode = "replace"; + if (args.length () > 1) + mode = args(1).xstring_value ("__u8_validate__: MODE must be a string."); - octave::string::u8_validate ("__u8_validate__", in_str); + octave::string::u8_fallback_type fb_type; + if (mode == "replace") + fb_type = octave::string::U8_REPLACEMENT_CHAR; + else if (mode == "unicode") + fb_type = octave::string::U8_ISO_8859_1; + else + error ("__u8_validate__: MODE must either be \"replace\" or \"unicode\"."); + + octave::string::u8_validate ("__u8_validate__", in_str, fb_type); return ovl (in_str); }