Mercurial > octave

////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 1994-2021 The Octave Project Developers
//
// See the file COPYRIGHT.md in the top-level directory of this
// distribution or <https://octave.org/copyright/>.
//
// This file is part of Octave.
//
// Octave is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Octave is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Octave; see the file COPYING.  If not, see
// <https://www.gnu.org/licenses/>.
//
////////////////////////////////////////////////////////////////////////

#if defined (HAVE_CONFIG_H)
#  include "config.h"
#endif

#include <cctype>

#include <queue>
#include <sstream>

#include "dMatrix.h"
#include "localcharset-wrapper.h"
#include "uniconv-wrappers.h"
#include "unistr-wrappers.h"

#include "Cell.h"
#include "defun.h"
#include "error.h"
#include "errwarn.h"
#include "ov.h"
#include "ovl.h"
#include "unwind-prot.h"
#include "utils.h"

#include "oct-string.h"

OCTAVE_NAMESPACE_BEGIN

DEFUN (char, args, ,
       doc: /* -*- texinfo -*-
@deftypefn  {} {} char (@var{x})
@deftypefnx {} {} char (@var{x}, @dots{})
@deftypefnx {} {} char (@var{s1}, @var{s2}, @dots{})
@deftypefnx {} {} char (@var{cell_array})
Create a string array from one or more numeric matrices, character
matrices, or cell arrays.

Arguments are concatenated vertically.  The returned values are padded with
blanks as needed to make each row of the string array have the same length.
Empty input strings are significant and will concatenated in the output.

For numerical input, each element is converted to the corresponding ASCII
character.  A range error results if an input is outside the ASCII range
(0-255).

For cell arrays, each element is concatenated separately.  Cell arrays
converted through @code{char} can mostly be converted back with
@code{cellstr}.  For example:

@example
@group
char ([97, 98, 99], "", @{"98", "99", 100@}, "str1", ["ha", "lf"])
   @result{} ["abc "
       "    "
       "98  "
       "99  "
       "d   "
       "str1"
       "half"]
@end group
@end example
@seealso{strvcat, cellstr}
@end deftypefn */)
{
  octave_value retval;

  int nargin = args.length ();

  if (nargin == 0)
    retval = "";
  else if (nargin == 1)
    retval = args(0).convert_to_str (true, true,
                                     args(0).is_dq_string () ? '"' : '\'');
  else
    {
      int n_elts = 0;

      int max_len = 0;

      std::queue<string_vector> args_as_strings;

      for (int i = 0; i < nargin; i++)
        {
          string_vector s = args(i).xstring_vector_value ("char: unable to convert some args to strings");

          if (s.numel () > 0)
            n_elts += s.numel ();
          else
            n_elts += 1;

          int s_max_len = s.max_length ();

          if (s_max_len > max_len)
            max_len = s_max_len;

          args_as_strings.push (s);
        }

      string_vector result (n_elts);

      int k = 0;

      for (int i = 0; i < nargin; i++)
        {
          string_vector s = args_as_strings.front ();
          args_as_strings.pop ();

          int n = s.numel ();

          if (n > 0)
            {
              for (int j = 0; j < n; j++)
                {
                  std::string t = s[j];
                  int t_len = t.length ();

                  if (max_len > t_len)
                    t += std::string (max_len - t_len, ' ');

                  result[k++] = t;
                }
            }
          else
            result[k++] = std::string (max_len, ' ');
        }

      retval = octave_value (result, '\'');
    }

  return retval;
}

/*
%!assert (char (), '')
%!assert (char (100), "d")
%!assert (char (100,100), ["d";"d"])
%!assert (char ({100,100}), ["d";"d"])
%!assert (char ([100,100]), ["dd"])
%!assert (char ({100,{100}}), ["d";"d"])
%!assert (char (100, [], 100), ["d";" ";"d"])
%!assert (char ({100, [], 100}), ["d";" ";"d"])
%!assert (char ({100,{100, {""}}}), ["d";"d";" "])
%!assert (char (["a ";"be"], {"c", 100}), ["a ";"be";"c ";"d "])
%!assert (char ("a", "bb", "ccc"), ["a  "; "bb "; "ccc"])
%!assert (char ([65, 83, 67, 73, 73]), "ASCII")

%!test
%! x = char ("foo", "bar", "foobar");
%! assert (x(1,:), "foo   ");
%! assert (x(2,:), "bar   ");
%! assert (x(3,:), "foobar");
*/

DEFUN (strvcat, args, ,
       doc: /* -*- texinfo -*-
@deftypefn  {} {} strvcat (@var{x})
@deftypefnx {} {} strvcat (@var{x}, @dots{})
@deftypefnx {} {} strvcat (@var{s1}, @var{s2}, @dots{})
@deftypefnx {} {} strvcat (@var{cell_array})
Create a character array from one or more numeric matrices, character
matrices, or cell arrays.

Arguments are concatenated vertically.  The returned values are padded with
blanks as needed to make each row of the string array have the same length.
Unlike @code{char}, empty strings are removed and will not appear in the
output.

For numerical input, each element is converted to the corresponding ASCII
character.  A range error results if an input is outside the ASCII range
(0-255).

For cell arrays, each element is concatenated separately.  Cell arrays
converted through @code{strvcat} can mostly be converted back with
@code{cellstr}.  For example:

@example
@group
strvcat ([97, 98, 99], "", @{"98", "99", 100@}, "str1", ["ha", "lf"])
      @result{} ["abc "
          "98  "
          "99  "
          "d   "
          "str1"
          "half"]
@end group
@end example
@seealso{char, strcat, cstrcat}
@end deftypefn */)
{
  int nargin = args.length ();
  int n_elts = 0;
  std::size_t max_len = 0;
  std::queue<string_vector> args_as_strings;

  for (int i = 0; i < nargin; i++)
    {
      string_vector s = args(i).xstring_vector_value ("strvcat: unable to convert some args to strings");

      std::size_t n = s.numel ();

      // do not count empty strings in calculation of number of elements
      if (n > 0)
        {
          for (std::size_t j = 0; j < n; j++)
            {
              if (! s[j].empty ())
                n_elts++;
            }
        }

      std::size_t s_max_len = s.max_length ();

      if (s_max_len > max_len)
        max_len = s_max_len;

      args_as_strings.push (s);
    }

  string_vector result (n_elts);

  octave_idx_type k = 0;

  for (int i = 0; i < nargin; i++)
    {
      string_vector s = args_as_strings.front ();
      args_as_strings.pop ();

      std::size_t n = s.numel ();

      if (n > 0)
        {
          for (std::size_t j = 0; j < n; j++)
            {
              std::string t = s[j];
              if (t.length () > 0)
                {
                  std::size_t t_len = t.length ();

                  if (max_len > t_len)
                    t += std::string (max_len - t_len, ' ');

                  result[k++] = t;
                }
            }
        }
    }

  // Cannot use ovl.  Relies on overloaded octave_value call.
  return octave_value (result, '\'');
}

/*
%!assert (strvcat (""), "")
%!assert (strvcat (100) == "d")
%!assert (strvcat (100,100), ["d";"d"])
%!assert (strvcat ({100,100}), ["d";"d"])
%!assert (strvcat ([100,100]), ["dd"])
%!assert (strvcat ({100,{100}}), ["d";"d"])
%!assert (strvcat (100, [], 100), ["d";"d"])
%!assert (strvcat ({100, [], 100}), ["d";"d"])
%!assert (strvcat ({100,{100, {""}}}), ["d";"d"])
%!assert (strvcat (["a ";"be"], {"c", 100}), ["a ";"be";"c ";"d "])
%!assert (strvcat ("a", "bb", "ccc"), ["a  "; "bb "; "ccc"])
%!assert (strvcat (), "")
*/

DEFUN (ischar, args, ,
       doc: /* -*- texinfo -*-
@deftypefn {} {} ischar (@var{x})
Return true if @var{x} is a character array.
@seealso{isfloat, isinteger, islogical, isnumeric, isstring, iscellstr, isa}
@end deftypefn */)
{
  if (args.length () != 1)
    print_usage ();

  return ovl (args(0).is_string ());
}

/*
%!assert (ischar ("a"), true)
%!assert (ischar (["ab";"cd"]), true)
%!assert (ischar ({"ab"}), false)
%!assert (ischar (1), false)
%!assert (ischar ([1, 2]), false)
%!assert (ischar ([]), false)
%!assert (ischar ([1, 2; 3, 4]), false)
%!assert (ischar (""), true)
%!assert (ischar ("test"), true)
%!assert (ischar (["test"; "ing"]), true)
%!assert (ischar (struct ("foo", "bar")), false)

%!error ischar ()
%!error ischar ("test", 1)
*/

static octave_value
do_strcmp_fun (const octave_value& arg0, const octave_value& arg1,
               octave_idx_type n, const char *fcn_name,
               bool (*array_op) (const Array<char>&, const Array<char>&,
                                 octave_idx_type),
               bool (*str_op) (const std::string&, const std::string&,
                               std::string::size_type))

{
  octave_value retval;

  bool s1_string = arg0.is_string ();
  bool s1_cell = arg0.iscell ();
  bool s2_string = arg1.is_string ();
  bool s2_cell = arg1.iscell ();

  if (s1_string && s2_string)
    retval = array_op (arg0.char_array_value (), arg1.char_array_value (), n);
  else if ((s1_string && s2_cell) || (s1_cell && s2_string))
    {
      octave_value str_val, cell_val;

      if (s1_string)
        {
          str_val = arg0;
          cell_val = arg1;
        }
      else
        {
          str_val = arg1;
          cell_val = arg0;
        }

      const Cell cell = cell_val.cell_value ();
      const string_vector str = str_val.string_vector_value ();
      octave_idx_type r = str.numel ();

      if (r == 0 || r == 1)
        {
          // Broadcast the string.

          boolNDArray output (cell_val.dims (), false);

          std::string s = (r == 0 ? "" : str[0]);

          if (cell_val.iscellstr ())
            {
              const Array<std::string> cellstr = cell_val.cellstr_value ();
              for (octave_idx_type i = 0; i < cellstr.numel (); i++)
                output(i) = str_op (cellstr(i), s, n);
            }
          else
            {
              // FIXME: should we warn here?
              for (octave_idx_type i = 0; i < cell.numel (); i++)
                {
                  if (cell(i).is_string ())
                    output(i) = str_op (cell(i).string_value (), s, n);
                }
            }

          retval = output;
        }
      else if (r > 1)
        {
          if (cell.numel () == 1)
            {
              // Broadcast the cell.

              const dim_vector dv (r, 1);
              boolNDArray output (dv, false);

              if (cell(0).is_string ())
                {
                  const std::string str2 = cell(0).string_value ();

                  for (octave_idx_type i = 0; i < r; i++)
                    output(i) = str_op (str[i], str2, n);
                }

              retval = output;
            }
          else
            {
              // Must match in all dimensions.

              boolNDArray output (cell.dims (), false);

              if (cell.numel () == r)
                {
                  if (cell_val.iscellstr ())
                    {
                      const Array<std::string> cellstr
                        = cell_val.cellstr_value ();
                      for (octave_idx_type i = 0; i < cellstr.numel (); i++)
                        output(i) = str_op (str[i], cellstr(i), n);
                    }
                  else
                    {
                      // FIXME: should we warn here?
                      for (octave_idx_type i = 0; i < r; i++)
                        {
                          if (cell(i).is_string ())
                            output(i) = str_op (str[i],
                                                cell(i).string_value (), n);
                        }
                    }

                  retval = output;
                }
              else
                retval = false;
            }
        }
    }
  else if (s1_cell && s2_cell)
    {
      octave_value cell1_val, cell2_val;
      octave_idx_type r1 = arg0.numel (), r2;

      if (r1 == 1)
        {
          // Make the singleton cell2.

          cell1_val = arg1;
          cell2_val = arg0;
        }
      else
        {
          cell1_val = arg0;
          cell2_val = arg1;
        }

      const Cell cell1 = cell1_val.cell_value ();
      const Cell cell2 = cell2_val.cell_value ();
      r1 = cell1.numel ();
      r2 = cell2.numel ();

      const dim_vector size1 = cell1.dims ();
      const dim_vector size2 = cell2.dims ();

      boolNDArray output (size1, false);

      if (r2 == 1)
        {
          // Broadcast cell2.

          if (cell2(0).is_string ())
            {
              const std::string str2 = cell2(0).string_value ();

              if (cell1_val.iscellstr ())
                {
                  const Array<std::string> cellstr = cell1_val.cellstr_value ();
                  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
                    output(i) = str_op (cellstr(i), str2, n);
                }
              else
                {
                  // FIXME: should we warn here?
                  for (octave_idx_type i = 0; i < r1; i++)
                    {
                      if (cell1(i).is_string ())
                        {
                          const std::string str1 = cell1(i).string_value ();
                          output(i) = str_op (str1, str2, n);
                        }
                    }
                }
            }
        }
      else
        {
          if (size1 != size2)
            error ("%s: nonconformant cell arrays", fcn_name);

          if (cell1.iscellstr () && cell2.iscellstr ())
            {
              const Array<std::string> cellstr1 = cell1_val.cellstr_value ();
              const Array<std::string> cellstr2 = cell2_val.cellstr_value ();
              for (octave_idx_type i = 0; i < r1; i++)
                output (i) = str_op (cellstr1(i), cellstr2(i), n);
            }
          else
            {
              // FIXME: should we warn here?
              for (octave_idx_type i = 0; i < r1; i++)
                {
                  if (cell1(i).is_string () && cell2(i).is_string ())
                    {
                      const std::string str1 = cell1(i).string_value ();
                      const std::string str2 = cell2(i).string_value ();
                      output(i) = str_op (str1, str2, n);
                    }
                }
            }
        }

      retval = output;
    }
  else
    retval = false;

  return retval;
}


// These are required so that they match the same signature as strncmp
// and strncmpi and can therefore be used in do_strcmp_fun.

template <typename T, typename T_size_type>
static bool
strcmp_ignore_n (const T& s1, const T& s2, T_size_type)
{ return string::strcmp (s1, s2); }

template <typename T, typename T_size_type>
static bool
strcmpi_ignore_n (const T& s1, const T& s2, T_size_type)
{ return string::strcmpi (s1, s2); }


DEFUN (strcmp, args, ,
       doc: /* -*- texinfo -*-
@deftypefn {} {} strcmp (@var{s1}, @var{s2})
Return 1 if the character strings @var{s1} and @var{s2} are the same,
and 0 otherwise.

If either @var{s1} or @var{s2} is a cell array of strings, then an array
of the same size is returned, containing the values described above for
every member of the cell array.  The other argument may also be a cell
array of strings (of the same size or with only one element), char matrix
or character string.

@strong{Caution:} For compatibility with @sc{matlab}, Octave's strcmp
function returns 1 if the character strings are equal, and 0 otherwise.
This is just the opposite of the corresponding C library function.
@seealso{strcmpi, strncmp, strncmpi}
@end deftypefn */)
{
  if (args.length () != 2)
    print_usage ();

  return ovl (do_strcmp_fun (args(0), args(1), 0, "strcmp",
                             strcmp_ignore_n, strcmp_ignore_n));
}

/*
%!shared x
%! x = char (zeros (0, 2));
%!assert (strcmp ("", x), false)
%!assert (strcmp (x, ""), false)
%!assert (strcmp (x, x), true)
## %!assert (strcmp ({""}, x), true)
## %!assert (strcmp ({x}, ""), false)
## %!assert (strcmp ({x}, x), true)
## %!assert (strcmp ("", {x}), false)
## %!assert (strcmp (x, {""}), false)
## %!assert (strcmp (x, {x}), true)
## %!assert (strcmp ({x; x}, ""), [false; false])
## %!assert (strcmp ({x; x}, {""}), [false; false])
## %!assert (strcmp ("", {x; x}), [false; false])
## %!assert (strcmp ({""}, {x; x}), [false; false])
%!assert (strcmp ({"foo"}, x), false)
%!assert (strcmp ({"foo"}, "foo"), true)
%!assert (strcmp ({"foo"}, x), false)
%!assert (strcmp (x, {"foo"}), false)
%!assert (strcmp ("foo", {"foo"}), true)
%!assert (strcmp (x, {"foo"}), false)
%!shared y
%! y = char (zeros (2, 0));
%!assert (strcmp ("", y), false)
%!assert (strcmp (y, ""), false)
%!assert (strcmp (y, y), true)
%!assert (strcmp ({""}, y), [true; true])
%!assert (strcmp ({y}, ""), true)
%!assert (strcmp ({y}, y), [true; true])
%!assert (strcmp ("", {y}), true)
%!assert (strcmp (y, {""}), [true; true])
%!assert (strcmp (y, {y}), [true; true])
%!assert (strcmp ({y; y}, ""), [true; true])
%!assert (strcmp ({y; y}, {""}), [true; true])
%!assert (strcmp ("", {y; y}), [true; true])
%!assert (strcmp ({""}, {y; y}), [true; true])
%!assert (strcmp ({"foo"}, y), [false; false])
%!assert (strcmp ({"foo"}, y), [false; false])
%!assert (strcmp (y, {"foo"}), [false; false])
%!assert (strcmp (y, {"foo"}), [false; false])
%!assert (strcmp ("foobar", "foobar"), true)
%!assert (strcmp ("foobar", "fooBar"), false)
%!assert (strcmp ("fooba", "foobar"), false)

%!error strcmp ()
%!error strcmp ("foo", "bar", 3)
*/

DEFUN (strncmp, args, ,
       doc: /* -*- texinfo -*-
@deftypefn {} {} strncmp (@var{s1}, @var{s2}, @var{n})
Return 1 if the first @var{n} characters of strings @var{s1} and @var{s2}
are the same, and 0 otherwise.

@example
@group
strncmp ("abce", "abcd", 3)
      @result{} 1
@end group
@end example

If either @var{s1} or @var{s2} is a cell array of strings, then an array
of the same size is returned, containing the values described above for
every member of the cell array.  The other argument may also be a cell
array of strings (of the same size or with only one element), char matrix
or character string.

@example
@group
strncmp ("abce", @{"abcd", "bca", "abc"@}, 3)
     @result{} [1, 0, 1]
@end group
@end example

@strong{Caution:} For compatibility with @sc{matlab}, Octave's strncmp
function returns 1 if the character strings are equal, and 0 otherwise.
This is just the opposite of the corresponding C library function.
@seealso{strncmpi, strcmp, strcmpi}
@end deftypefn */)
{
  if (args.length () != 3)
    print_usage ();

  octave_idx_type n = args(2).idx_type_value ();

  if (n > 0)
    return ovl (do_strcmp_fun (args(0), args(1), n, "strncmp",
                               string::strncmp,
                               string::strncmp));
  else
    error ("strncmp: N must be greater than 0");
}

/*
%!assert (strncmp ("abce", "abc", 3), true)
%!assert (strncmp ("abce", "aBc", 3), false)
%!assert (strncmp (100, 100, 1), false)
%!assert (strncmp ("abce", {"abcd", "bca", "abc"}, 3), logical ([1, 0, 1]))
%!assert (strncmp ("abc",  {"abcd", "bca", "abc"}, 4), logical ([0, 0, 1]))
%!assert (strncmp ({"abcd", "bca", "abc"},"abce", 3), logical ([1, 0, 1]))
%!assert (strncmp ({"abcd", "bca", "abc"},{"abcd", "bca", "abe"}, 3), logical ([1, 1, 0]))
%!assert (strncmp ("abc", {"abcd", 10}, 2), logical ([1, 0]))

%!assert <*54373> (strncmp ("abc", "abc", 100))

%!error strncmp ()
%!error strncmp ("abc", "def")
*/

DEFUNX ("strcmpi", Fstrcmpi, args, ,
        doc: /* -*- texinfo -*-
@deftypefn {} {} strcmpi (@var{s1}, @var{s2})
Return 1 if the character strings @var{s1} and @var{s2} are the same,
disregarding case of alphabetic characters, and 0 otherwise.

If either @var{s1} or @var{s2} is a cell array of strings, then an array
of the same size is returned, containing the values described above for
every member of the cell array.  The other argument may also be a cell
array of strings (of the same size or with only one element), char matrix
or character string.

@strong{Caution:} For compatibility with @sc{matlab}, Octave's strcmp
function returns 1 if the character strings are equal, and 0 otherwise.
This is just the opposite of the corresponding C library function.

@strong{Caution:} National alphabets are not supported.
@seealso{strcmp, strncmp, strncmpi}
@end deftypefn */)
{
  if (args.length () != 2)
    print_usage ();

  return ovl (do_strcmp_fun (args(0), args(1), 0, "strcmpi",
                             strcmpi_ignore_n, strcmpi_ignore_n));
}

/*
%!assert (strcmpi ("abc123", "ABC123"), true)
*/

DEFUNX ("strncmpi", Fstrncmpi, args, ,
        doc: /* -*- texinfo -*-
@deftypefn {} {} strncmpi (@var{s1}, @var{s2}, @var{n})
Return 1 if the first @var{n} character of @var{s1} and @var{s2} are the
same, disregarding case of alphabetic characters, and 0 otherwise.

If either @var{s1} or @var{s2} is a cell array of strings, then an array
of the same size is returned, containing the values described above for
every member of the cell array.  The other argument may also be a cell
array of strings (of the same size or with only one element), char matrix
or character string.

@strong{Caution:} For compatibility with @sc{matlab}, Octave's strncmpi
function returns 1 if the character strings are equal, and 0 otherwise.
This is just the opposite of the corresponding C library function.

@strong{Caution:} National alphabets are not supported.
@seealso{strncmp, strcmp, strcmpi}
@end deftypefn */)
{
  if (args.length () != 3)
    print_usage ();

  octave_idx_type n = args(2).idx_type_value ();

  if (n > 0)
    return ovl (do_strcmp_fun (args(0), args(1), n, "strncmpi",
                               string::strncmpi,
                               string::strncmpi));
  else
    error ("strncmpi: N must be greater than 0");
}

/*
%!assert (strncmpi ("abc123", "ABC456", 3), true)

%!assert <*54373> (strncmpi ("abc", "abC", 100))
*/

DEFUN (str2double, args, ,
       doc: /* -*- texinfo -*-
@deftypefn {} {} str2double (@var{s})
Convert a string to a real or complex number.

The string must be in one of the following formats where a and b are real
numbers and the complex unit is @qcode{'i'} or @qcode{'j'}:

@itemize
@item a + bi

@item a + b*i

@item a + i*b

@item bi + a

@item b*i + a

@item i*b + a
@end itemize

If present, a and/or b are of the form @nospell{[+-]d[,.]d[[eE][+-]d]} where
the brackets indicate optional arguments and @qcode{'d'} indicates zero or
more digits.  The special input values @code{Inf}, @code{NaN}, and @code{NA}
are also accepted.

@var{s} may be a character string, character matrix, or cell array.  For
character arrays the conversion is repeated for every row, and a double or
complex array is returned.  Empty rows in @var{s} are deleted and not
returned in the numeric array.  For cell arrays each character string
element is processed and a double or complex array of the same dimensions as
@var{s} is returned.

For unconvertible scalar or character string input @code{str2double} returns
a NaN@.  Similarly, for character array input @code{str2double} returns a
NaN for any row of @var{s} that could not be converted.  For a cell array,
@code{str2double} returns a NaN for any element of @var{s} for which
conversion fails.  Note that numeric elements in a mixed string/numeric
cell array are not strings and the conversion will fail for these elements
and return NaN.

@code{str2double} can replace @code{str2num}, and it avoids the security
risk of using @code{eval} on unknown data.
@seealso{str2num}
@end deftypefn */)
{
  if (args.length () != 1)
    print_usage ();

  octave_value retval;

  if (args(0).is_string ())
    {
      if (args(0).rows () == 0 || args(0).columns () == 0)
        retval = Matrix (1, 1, numeric_limits<double>::NaN ());
      else if (args(0).rows () == 1 && args(0).ndims () == 2)
        retval = string::str2double (args(0).string_value ());
      else
        {
          const string_vector sv = args(0).string_vector_value ();

          retval = sv.map<Complex> (string::str2double);
        }
    }
  else if (args(0).iscell ())
    {
      const Cell cell = args(0).cell_value ();

      ComplexNDArray output (cell.dims (), numeric_limits<double>::NaN ());

      for (octave_idx_type i = 0; i < cell.numel (); i++)
        {
          if (cell(i).is_string ())
            output(i) = string::str2double (cell(i).string_value ());
        }
      retval = output;
    }
  else
    retval = Matrix (1, 1, numeric_limits<double>::NaN ());

  return retval;
}

/*
%!assert (str2double ("1"), 1)
%!assert (str2double ("-.1e-5"), -1e-6)
%!testif ; ! ismac ()
%! assert (str2double (char ("1", "2 3", "4i")), [1; NaN; 4i]);
%!test <47413>
%! ## Same test code as above, but intended only for test statistics on Mac.
%! if (! ismac ()), return; endif
%! assert (str2double (char ("1", "2 3", "4i")), [1; NaN; 4i]);
%!assert (str2double ("1,222.5"), 1222.5)
%!assert (str2double ("i"), i)
%!assert (str2double ("2j"), 2i)
%!assert (str2double ("2 + j"), 2+j)
%!assert (str2double ("i*2 + 3"), 3+2i)
%!assert (str2double (".5*i + 3.5"), 3.5+0.5i)
%!assert (str2double ("1e-3 + i*.25"), 1e-3 + 0.25i)
%!assert (str2double (char ("2 + j","1.25e-3","-05")), [2+i; 1.25e-3; -5])
%!assert (str2double ({"2 + j","1.25e-3","-05"}), [2+i, 1.25e-3, -5])
%!assert (str2double (1), NaN)
%!assert (str2double ("1 2 3 4"), NaN)
%!assert (str2double ("Hello World"), NaN)
%!assert (str2double ("NaN"), NaN)
%!assert (str2double ("NA"), NA)
%!assert (str2double ("Inf"), Inf)
%!assert (str2double ("iNF"), Inf)
%!assert (str2double ("-Inf"), -Inf)
%!assert (str2double ("Inf*i"), complex (0, Inf))
%!assert (str2double ("iNF*i"), complex (0, Inf))
%!assert (str2double ("NaN + Inf*i"), complex (NaN, Inf))
%!assert (str2double ("Inf - Inf*i"), complex (Inf, -Inf))
%!assert (str2double ("-i*NaN - Inf"), complex (-Inf, -NaN))
%!testif ; ! ismac ()
%! assert (str2double ({"abc", "4i"}), [NaN + 0i, 4i]);
%!test <47413>
%! if (! ismac ()), return; endif
%! assert (str2double ({"abc", "4i"}), [NaN + 0i, 4i]);
%!testif ; ! ismac ()
%! assert (str2double ({2, "4i"}), [NaN + 0i, 4i])
%!test <47413>
%! if (! ismac ()), return; endif
%! assert (str2double ({2, "4i"}), [NaN + 0i, 4i])
%!assert (str2double (zeros (3,1,2)), NaN)
%!assert (str2double (''), NaN)
%!assert (str2double ([]), NaN)
%!assert (str2double (char (zeros (3,0))), NaN)
*/

DEFUN (__native2unicode__, args, ,
       doc: /* -*- texinfo -*-
@deftypefn {} {@var{utf8_str} =} __native2unicode__ (@var{native_bytes}, @var{codepage})
Convert byte stream @var{native_bytes} to UTF-8 using @var{codepage}.

@seealso{native2unicode, __unicode2native__}
@end deftypefn */)
{
  if (args(0).is_string ())
    return ovl (args(0));

  std::string tmp = args(1).string_value ();
  const char *codepage
    = (tmp.empty () ? octave_locale_charset_wrapper () : tmp.c_str ());

  charNDArray native_bytes = args(0).char_array_value ();

  const char *src = native_bytes.data ();
  std::size_t srclen = native_bytes.numel ();

  std::size_t length;
  uint8_t *utf8_str = nullptr;

  utf8_str = octave_u8_conv_from_encoding (codepage, src, srclen, &length);

  if (! utf8_str)
    {
      if (errno == ENOSYS)
        error ("native2unicode: iconv() is not supported.  Installing GNU "
               "libiconv and then re-compiling Octave could fix this.");
      else
        error ("native2unicode: converting from codepage '%s' to UTF-8: %s",
               codepage, std::strerror (errno));
    }

  unwind_action free_utf8_str ([=] () { ::free (utf8_str); });

  octave_idx_type len = length;

  charNDArray retval (dim_vector (1, len));

  for (octave_idx_type i = 0; i < len; i++)
    retval.xelem (i) = utf8_str[i];

  return ovl (retval);
}

DEFUN (__unicode2native__, args, ,
       doc: /* -*- texinfo -*-
@deftypefn {} {@var{native_bytes} =} __unicode2native__ (@var{utf8_str}, @var{codepage})
Convert UTF-8 string @var{utf8_str} to byte stream @var{native_bytes} using
@var{codepage}.

@seealso{unicode2native, __native2unicode__}
@end deftypefn */)
{
  std::string tmp = args(1).string_value ();
  const char *codepage
    = (tmp.empty () ? octave_locale_charset_wrapper () : tmp.c_str ());

  charNDArray utf8_str = args(0).char_array_value ();

  const uint8_t *src = reinterpret_cast<const uint8_t *> (utf8_str.data ());
  std::size_t srclen = utf8_str.numel ();

  std::size_t length;
  char *native_bytes = nullptr;

  native_bytes = octave_u8_conv_to_encoding (codepage, src, srclen, &length);

  if (! native_bytes)
    {
      if (errno == ENOSYS)
        error ("unicode2native: iconv() is not supported.  Installing GNU "
               "libiconv and then re-compiling Octave could fix this.");
      else
        error ("unicode2native: converting from UTF-8 to codepage '%s': %s",
               codepage, std::strerror (errno));
    }

  unwind_action free_native_bytes ([=] () { ::free (native_bytes); });

  octave_idx_type len = length;

  uint8NDArray retval (dim_vector (1, len));

  for (octave_idx_type i = 0; i < len; i++)
    retval.xelem (i) = native_bytes[i];

  return ovl (retval);
}

DEFUN (__locale_charset__, , ,
       doc: /* -*- texinfo -*-
@deftypefn {} {@var{charset} =} __locale_charset__ ()
Return the identifier for the charset used if the encoding is set to
@qcode{"locale"}.
@end deftypefn */)
{
  const char *charset = octave_locale_charset_wrapper ();
  std::string charset_str (charset);
  return ovl (charset_str);
}

DEFUN (unicode_idx, args, ,
       doc: /* -*- texinfo -*-
@deftypefn {} {@var{idx} =} unicode_idx (@var{str})
Return an array with the indices for each UTF-8 encoded character in @var{str}.

@example
@group
unicode_idx ("aäbc")
     @result{} [1, 2, 2, 3, 4]
@end group
@end example

@end deftypefn */)
{
  if (args.length () != 1)
    print_usage ();

  charNDArray str = args(0).xchar_array_value ("STR must be a string");
  Array<octave_idx_type> p (dim_vector (str.ndims (), 1));
  charNDArray str_p;
  if (str.ndims () > 1)
    {
      for (octave_idx_type i=0; i < str.ndims (); i++)
        p(i) = i;
      p(0) = 1;
      p(1) = 0;
      str_p = str.permute (p);
    }

  const uint8_t *src = reinterpret_cast<const uint8_t *> (str_p.data ());
  octave_idx_type srclen = str.numel ();

  NDArray idx (str_p.dims ());

  octave_idx_type u8_char_num = 1;
  for (octave_idx_type i = 0; i < srclen; u8_char_num++)
    {
      int mblen = octave_u8_strmblen_wrapper (src + i);
      if (mblen < 1)
        mblen = 1;
      for (octave_idx_type j = 0; j < mblen; j++)
        idx(i+j) = u8_char_num;
      i += mblen;
    }

  return ovl (str.ndims () > 1 ? idx.permute (p, true) : idx);
}

/*
%!assert (unicode_idx (["aäou"; "Ä∞"]), [1 2 2 3 4; 5 5 6 6 6])
*/

DEFUN (__unicode_length__, args, ,
       doc: /* -*- texinfo -*-
@deftypefn {} {@var{len} =} __unicode_length__ (@var{str})
Return number of Unicode code points in @var{str}.

The input @var{str} must be a UTF-8 encoded character vector or cell string.

@example
@group
length ("aäbc")
     @result{} 5
__unicode_length__ ("aäbc")
     @result{} 4
@end group
@end example

@end deftypefn */)
{
  if (args.length () != 1)
    print_usage ();

  bool arg_char = args(0).is_char_matrix ();

  if (! arg_char && ! args(0).iscellstr ())
    error ("STR must be a character array or cell string.");

  octave_value_list retval;

  if (arg_char)
    {
      charNDArray str = args(0).char_array_value ();
      Array<octave_idx_type> p (dim_vector (str.ndims (), 1));
      if (str.ndims () > 1)
        {
          for (octave_idx_type i=0; i < str.ndims (); i++)
            p(i) = i;
          p(0) = 1;
          p(1) = 0;
          str = str.permute (p);
        }

      const uint8_t *src = reinterpret_cast<const uint8_t *> (str.data ());
      octave_idx_type mbsnlen = octave_u8_mbsnlen_wrapper (src, str.numel ());

      retval = ovl (mbsnlen);
    }
  else
    {
      const Array<std::string> cellstr = args(0).cellstr_value ();
      NDArray output (args(0).dims (), false);
      for (octave_idx_type i = 0; i < cellstr.numel (); i++)
        {
          const uint8_t *src
            = reinterpret_cast<const uint8_t *> (cellstr(i).c_str ());
          output(i) = octave_u8_mbsnlen_wrapper (src, cellstr(i).size ());
        }

      retval = ovl (output);
    }

  return retval;
}

/*
%!assert (__unicode_length__ (""), 0)
%!assert (__unicode_length__ ("aäbc"), 4)
%!assert (__unicode_length__ (["aä"; "öo"]), 4)
%!assert (__unicode_length__ ({"aäbc", "abc"}), [4, 3])
*/

DEFUN (__u8_validate__, args, ,
       doc: /* -*- texinfo -*-
@deftypefn {} {@var{out_str} =} __u8_validate__ (in_str, mode)
Return string with valid UTF-8.

On encountering invalid UTF-8 in @var{in_str}, the bytes are either replaced by
the replacement character @qcode{"�"} (if @var{mode} is omitted or is the
string @qcode{"replace"}) or interpreted as the Unicode code points
U+0080–U+00FF with the same value as the byte (if @var{mode} is the string
@qcode{"unicode"}), thus interpreting the bytes according to ISO-8859-1.
@end deftypefn */)
{
  int nargin = args.length ();

  if (nargin < 1 || nargin > 2)
    print_usage ();

  // Input check
  std::string in_str =
    args(0).xstring_value ("__u8_validate__: IN_STR must be a string");

  std::string mode = "replace";
  if (nargin == 2)
    mode = args(1).xstring_value ("__u8_validate__: MODE must be a string");

  string::u8_fallback_type fb_type;
  if (mode == "replace")
    fb_type = string::U8_REPLACEMENT_CHAR;
  else if (mode == "unicode")
    fb_type = string::U8_ISO_8859_1;
  else
    error (R"(__u8_validate__: MODE must be either "replace" or "unicode")");

  string::u8_validate ("__u8_validate__", in_str, fb_type);

  return ovl (in_str);
}

DEFUN (newline, args, ,
       doc: /* -*- texinfo -*-
@deftypefn {} {} newline
Return the character corresponding to a newline.

This is equivalent to @qcode{"@backslashchar{}n"}.

Example Code

@example
@group
joined_string = [newline "line1" newline "line2"]
@result{}
line1
line2
@end group
@end example

@seealso{strcat, strjoin, strsplit}
@end deftypefn */)
{
  if (args.length () != 0)
    print_usage ();

  static octave_value_list retval = ovl ("\n");

  return retval;
}

/*
%!assert (newline (), "\n")

%!error newline (1)
## FIXME: The next error() test requires a semicolon at EOL until
##        bug #59265 is resolved.
%!error [a, b] = newline ();
*/

DEFUN (list_in_columns, args, ,
       doc: /* -*- texinfo -*-
@deftypefn {} {} list_in_columns (@var{arg}, @var{width}, @var{prefix})
Return a string containing the elements of @var{arg} listed in columns with
an overall maximum width of @var{width} and optional prefix @var{prefix}.

The argument @var{arg} must be a cell array of character strings or a
character array.

If @var{width} is not specified or is an empty matrix, or less than or equal
to zero, the width of the terminal screen is used.  Newline characters are
used to break the lines in the output string.  For example:
@c Set example in small font to prevent overfull line

@smallexample
@group
list_in_columns (@{"abc", "def", "ghijkl", "mnop", "qrs", "tuv"@}, 20)
     @result{} abc     mnop
        def     qrs
        ghijkl  tuv

whos ans
     @result{}
     Variables in the current scope:

       Attr Name        Size                     Bytes  Class
       ==== ====        ====                     =====  =====
            ans         1x37                        37  char

     Total is 37 elements using 37 bytes
@end group
@end smallexample

@seealso{terminal_size}
@end deftypefn */)
{
  int nargin = args.length ();

  if (nargin < 1 || nargin > 3)
    print_usage ();

  string_vector s = args(0).xstring_vector_value ("list_in_columns: ARG must be a cellstr or char array");

  int width = -1;

  if (nargin > 1 && ! args(1).isempty ())
    width = args(1).xint_value ("list_in_columns: WIDTH must be an integer");

  std::string prefix;

  if (nargin > 2)
    prefix = args(2).xstring_value ("list_in_columns: PREFIX must be a string");

  std::ostringstream buf;

  s.list_in_columns (buf, width, prefix);

  return ovl (buf.str ());
}

/*
%!test
%! input  = {"abc", "def", "ghijkl", "mnop", "qrs", "tuv"};
%! result = "abc     mnop\ndef     qrs\nghijkl  tuv\n";
%! assert (list_in_columns (input, 20), result);
%!test
%! input  = char ("abc", "def", "ghijkl", "mnop", "qrs", "tuv");
%! result = "abc     mnop  \ndef     qrs   \nghijkl  tuv   \n";
%! assert (list_in_columns (input, 20), result);
%!test
%! input  = char ("abc", "def", "ghijkl", "mnop", "qrs", "tuv");
%! result = "  abc     mnop  \n  def     qrs   \n  ghijkl  tuv   \n";
%! assert (list_in_columns (input, 20, "  "), result);

%!error list_in_columns ()
%!error list_in_columns (["abc", "def"], 20, 2)
%!error list_in_columns (["abc", "def"], 20, "  ", 3)
%!error <list_in_columns: WIDTH must be an integer> list_in_columns (["abc", "def"], "a")
*/

OCTAVE_NAMESPACE_END
author	John W. Eaton <jwe@octave.org>
date	Tue, 21 Sep 2021 13:32:41 -0400
parents	7d6709900da7
children	91c6288781ba a61e1a0f6024