changeset 23356:ef20eee0247d

allow hex2num to handle integer values * hex2num.cc: Handle integer types. Simplify using templates and generic functions. Allow num2hex to return a cell array. Ensure hex digits are always created and processed in big-endian order. New tests. * NEWS: Note changes.
author John W. Eaton <jwe@octave.org>
date Thu, 06 Apr 2017 13:18:51 -0400
parents 08db4e9dd149
children 426b593b4b6b
files NEWS libinterp/corefcn/hex2num.cc
diffstat 2 files changed, 259 insertions(+), 171 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS	Thu Apr 06 13:12:09 2017 -0400
+++ b/NEWS	Thu Apr 06 13:18:51 2017 -0400
@@ -19,6 +19,13 @@
     storing and retrieving values by name, rather than by position which
     is how arrays work.
 
+ ** hex2num and num2hex now work for integer and char types and num2hex
+    may optionally return a cell array of strings instead of a character
+    array.  If given a cell array of strings, hex2num now returns a
+    numeric array of the same size as the input cell array.  Previously,
+    hex2num would accept a cell array of strings of arbitrary dimension
+    but would always return a column vector.
+
  ** The "names" option used in regular expressions now returns a struct
     array, rather than a struct with a cell array for each field.  This
     change was made for Matlab compatibility.
--- a/libinterp/corefcn/hex2num.cc	Thu Apr 06 13:12:09 2017 -0400
+++ b/libinterp/corefcn/hex2num.cc	Thu Apr 06 13:18:51 2017 -0400
@@ -1,5 +1,6 @@
 /*
 
+Copyright (C) 2017 John W. Eaton
 Copyright (C) 2008-2017 David Bateman
 
 This file is part of Octave.
@@ -24,23 +25,96 @@
 #  include "config.h"
 #endif
 
-#include <algorithm>
-
 #include "defun.h"
 #include "error.h"
 #include "errwarn.h"
+#include "mach-info.h"
+#include "ov.h"
 #include "ovl.h"
 #include "utils.h"
 
+static inline bool
+is_little_endian (bool is_float)
+{
+  return ((is_float && (octave::mach_info::native_float_format ()
+                        == octave::mach_info::flt_fmt_ieee_little_endian))
+          || octave::mach_info::words_little_endian ());
+}
+
+static uint8_t
+hex2nibble (unsigned char ch)
+{
+  unsigned char val = 0;
+
+  if (! isxdigit (ch))
+    error ("hex2num: invalid character '%c' found in string S", ch);
+
+  if (ch >= 'a')
+    val = static_cast<unsigned char> (ch - 'a' + 10);
+  else if (ch >= 'A')
+    val = static_cast<unsigned char> (ch - 'A' + 10);
+  else
+    val = static_cast<unsigned char> (ch - '0');
+
+  return val;
+}
+
+static void
+hex2num (const std::string& hex, void *num, size_t nbytes, bool swap_bytes)
+{
+  unsigned char *cp = reinterpret_cast<unsigned char *> (num);
+
+  const size_t nc = hex.length ();
+  const size_t nchars = 2 * nbytes;
+
+  if (nc > nchars)
+    error ("hex2num: S must be no more than %d characters", nchars);
+
+  size_t j = 0;
+
+  for (size_t i = 0; i < nbytes; i++)
+    {
+      size_t k = swap_bytes ? nbytes - i - 1 : i;
+
+      unsigned char ch1 = (j < nc) ? hex[j++] : '0';
+      unsigned char ch2 = (j < nc) ? hex[j++] : '0';
+
+      cp[k] = (hex2nibble (ch1) << 4) + hex2nibble (ch2);
+    }
+}
+
+template <typename T>
+Array<T>
+hex2num (const Array<std::string>& val, bool swap_bytes)
+{
+  octave_idx_type nel = val.numel ();
+
+  Array<T> m (val.dims ());
+
+  size_t nbytes = sizeof (T);
+
+  for (octave_idx_type i = 0; i < nel; i++)
+    {
+      T num;
+
+      hex2num (val.xelem (i), &num, nbytes, swap_bytes);
+
+      m(i) = num;
+    }
+
+  return m;
+}
+
 DEFUN (hex2num, args, ,
        doc: /* -*- texinfo -*-
 @deftypefn  {} {@var{n} =} hex2num (@var{s})
 @deftypefnx {} {@var{n} =} hex2num (@var{s}, @var{class})
-Typecast the 16 character hexadecimal character string to an IEEE 754
-double precision number.
+Typecast a hexadecimal character array or cell array of strings to an
+array of numbers.
 
-If fewer than 16 characters are given the strings are right padded with
-@qcode{'0'} characters.
+By default, the input array is interpreted as a hexadecimal number
+representing a double precision value.  If fewer than 16 characters are
+given the strings are right padded with @qcode{'0'} characters.
 
 Given a string matrix, @code{hex2num} treats each row as a separate number.
 
@@ -51,10 +125,25 @@
 @end group
 @end example
 
-The optional argument @var{class} can be passed as the string
-@qcode{"single"} to specify that the given string should be interpreted as
-a single precision number.  In this case, @var{s} should be an 8 character
-hexadecimal string.  For example:
+The optional second argument @var{class} may be used to cause the input
+array to be interpreted as a different value type.  Possible values are
+
+@multitable {Option} {Characters}
+@headitem Option @tab Characters
+@item @qcode{"int8"} @tab 2
+@item @qcode{"uint8"} @tab 2
+@item @qcode{"int16"} @tab 4
+@item @qcode{"uint16"} @tab 4
+@item @qcode{"int32"} @tab 8
+@item @qcode{"uint32"} @tab 8
+@item @qcode{"int64"} @tab 16
+@item @qcode{"uint64"} @tab 16
+@item @qcode{"char"} @tab 2
+@item @qcode{"single"} @tab 8
+@item @qcode{"double"} @tab 16
+@end multitable
+
+For example:
 
 @example
 @group
@@ -65,104 +154,47 @@
 @seealso{num2hex, hex2dec, dec2hex}
 @end deftypefn */)
 {
+  octave_value retval;
+
   int nargin = args.length ();
 
   if (nargin < 1 || nargin > 2)
     print_usage ();
 
-  if (nargin == 2 && ! args(1).is_string ())
-    error ("hex2num: CLASS must be a string");
-
-  const charMatrix cmat = args(0).char_matrix_value ();
-  std::string prec = (nargin == 2) ? args(1).string_value () : "double";
-  bool is_single = (prec == "single");
-  octave_idx_type nchars = (is_single) ? 8 : 16;
-
-  if (cmat.columns () > nchars)
-    error ("hex2num: S must be no more than %d characters", nchars);
-  else if (prec != "double" && prec != "single")
-    error ("hex2num: CLASS must be either \"double\" or \"single\"");
-
-  octave_value retval;
-  octave_idx_type nr = cmat.rows ();
-  octave_idx_type nc = cmat.columns ();
-
-  if (is_single)
-    {
-      FloatColumnVector m (nr);
+  std::string type = "double";
+  if (nargin == 2)
+    type = args(1).xstring_value ("hex2num: CLASS must be a string");
 
-      for (octave_idx_type i = 0; i < nr; i++)
-        {
-          union
-          {
-            uint32_t ival;
-            float dval;
-          } num;
-
-          num.ival = 0;
+  Array<std::string> val = args(0).cellstr_value ();
 
-          for (octave_idx_type j = 0; j < nc; j++)
-            {
-              unsigned char ch = cmat.elem (i, j);
-
-              if (! isxdigit (ch))
-                error ("hex2num: illegal character found in string S");
-
-              num.ival <<= 4;
-              if (ch >= 'a')
-                num.ival += static_cast<uint32_t> (ch - 'a' + 10);
-              else if (ch >= 'A')
-                num.ival += static_cast<uint32_t> (ch - 'A' + 10);
-              else
-                num.ival += static_cast<uint32_t> (ch - '0');
-            }
+  // We always use big-endian order for hex digits.
+  bool is_float = type == "single" || type == "double";
+  bool swap_bytes = is_little_endian (is_float);
 
-          if (nc < nchars)
-            num.ival <<= (nchars - nc) * 4;
-
-          m(i) = num.dval;
-        }
-
-      retval = m;
-    }
+  if (type == "int8")
+    retval = octave_value (hex2num<octave_int8> (val, swap_bytes));
+  else if (type == "uint8")
+    retval = octave_value (hex2num<octave_uint8> (val, swap_bytes));
+  else if (type == "int16")
+    retval = octave_value (hex2num<octave_int16> (val, swap_bytes));
+  else if (type == "uint16")
+    retval = octave_value (hex2num<octave_uint16> (val, swap_bytes));
+  else if (type == "int32")
+    retval = octave_value (hex2num<octave_int32> (val, swap_bytes));
+  else if (type == "uint32")
+    retval = octave_value (hex2num<octave_uint32> (val, swap_bytes));
+  else if (type == "int64")
+    retval = octave_value (hex2num<octave_int64> (val, swap_bytes));
+  else if (type == "uint64")
+    retval = octave_value (hex2num<octave_uint64> (val, swap_bytes));
+  else if (type == "char")
+    retval = octave_value (hex2num<char> (val, swap_bytes));
+  else if (type == "single")
+    retval = octave_value (hex2num<float> (val, swap_bytes));
+  else if (type == "double")
+    retval = octave_value (hex2num<double> (val, swap_bytes));
   else
-    {
-      ColumnVector m (nr);
-
-      for (octave_idx_type i = 0; i < nr; i++)
-        {
-          union
-          {
-            uint64_t ival;
-            double dval;
-          } num;
-
-          num.ival = 0;
-
-          for (octave_idx_type j = 0; j < nc; j++)
-            {
-              unsigned char ch = cmat.elem (i, j);
-
-              if (! isxdigit (ch))
-                error ("hex2num: illegal character found in string S");
-
-              num.ival <<= 4;
-              if (ch >= 'a')
-                num.ival += static_cast<uint64_t> (ch - 'a' + 10);
-              else if (ch >= 'A')
-                num.ival += static_cast<uint64_t> (ch - 'A' + 10);
-              else
-                num.ival += static_cast<uint64_t> (ch - '0');
-            }
-
-          if (nc < nchars)
-            num.ival <<= (nchars - nc) * 4;
-
-          m(i) = num.dval;
-        }
-
-      retval = m;
-    }
+    error ("hex2num: unrecognized CLASS '%s'", type);
 
   return retval;
 }
@@ -170,13 +202,72 @@
 /*
 %!assert (hex2num (["c00";"bff";"000";"3ff";"400"]), [-2:2]')
 %!assert (hex2num (["c00";"bf8";"000";"3f8";"400"], "single"), single([-2:2])')
+%!assert (hex2num ("ff", "uint8"), intmax ("uint8"))
+%!assert (hex2num ("ffff", "uint16"), intmax ("uint16"))
+%!assert (hex2num ("ffffffff", "uint32"), intmax ("uint32"))
+%!assert (hex2num ("ffffffff", "uint32"), intmax ("uint32"))
+%!assert (hex2num ("ffffffffffffffff", "uint64"), intmax ("uint64"))
 */
 
+static inline unsigned char
+nibble2hex (unsigned char ch)
+{
+  if (ch >= 10)
+    ch += 'a' - 10;
+  else
+    ch += '0';
+
+  return ch;
+}
+
+static inline void
+num2hex (const void *p, size_t n, char *hex, bool swap_bytes)
+{
+  const unsigned char *cp = reinterpret_cast<const unsigned char *> (p);
+
+  size_t k = 0;
+
+  for (size_t i = 0; i < n; i++)
+    {
+      size_t j = swap_bytes ? n - i - 1 : i;
+
+      unsigned char ch = cp[j];
+
+      hex[k++] = nibble2hex ((ch >> 4) & 0xF);
+      hex[k++] = nibble2hex (ch & 0xF);
+    }
+}
+
+template <typename T>
+Cell
+num2hex (const Array<T>& v, bool swap_bytes)
+{
+  const size_t nbytes = sizeof (T);
+  const size_t nchars = 2 * nbytes;
+
+  octave_idx_type nel = v.numel ();
+
+  string_vector sv (nel);
+
+  const T *pv = v.fortran_vec ();
+
+  for (octave_idx_type i = 0; i < nel; i++)
+    {
+      char hex[nchars];
+
+      num2hex (pv++, nbytes, hex, swap_bytes);
+
+      sv[i] = std::string (hex, nchars);
+    }
+
+  return Cell (v.dims (), sv);
+}
+
 DEFUN (num2hex, args, ,
        doc: /* -*- texinfo -*-
 @deftypefn {} {@var{s} =} num2hex (@var{n})
-Typecast a double or single precision number or vector to a 8 or 16
-character hexadecimal string of the IEEE 754 representation of the number.
+@deftypefnx {} {@var{s} =} num2hex (@var{n}, "cell")
+Convert a numeric array to an array of hexadecimal strings.
 
 For example:
 
@@ -202,94 +293,84 @@
     7f800000"
 @end group
 @end example
+
+With the optional second argument @qcode{"cell}, return a cell array of
+strings instead of a character array.
 @seealso{hex2num, hex2dec, dec2hex}
 @end deftypefn */)
 {
-  if (args.length () != 1)
+  int nargin = args.length ();
+
+  if (nargin < 1 || nargin > 2)
     print_usage ();
 
-  if (args(0).is_complex_type ())
+  bool as_cell = false;
+
+  if (nargin == 2)
+    {
+      std::string opt = args(1).xstring_value ("num2hex: second argument must be a string");
+      if (opt == "cell")
+        as_cell = true;
+      else
+        error ("num2hex: unrecognized option '%s'", opt.c_str ());
+    }
+
+  octave_value val = args(0);
+
+  if (val.is_complex_type ())
     error ("num2hex: N must be real");
 
-  octave_value retval;
-
-  if (args(0).is_single_type ())
-    {
-      const FloatColumnVector v (args(0).float_vector_value ());
-
-      octave_idx_type nchars = 8;
-      octave_idx_type nr = v.numel ();
-      charMatrix m (nr, nchars);
-      const float *pv = v.fortran_vec ();
+  Cell result;
 
-      for (octave_idx_type i = 0; i < nr; i++)
-        {
-          union
-          {
-            uint32_t ival;
-            float dval;
-          } num;
-
-          num.dval = *pv++;
-
-          for (octave_idx_type j = 0; j < nchars; j++)
-            {
-              unsigned char ch =
-                static_cast<char>(num.ival >> ((nchars - 1 - j) * 4) & 0xF);
-              if (ch >= 10)
-                ch += 'a' - 10;
-              else
-                ch += '0';
-
-              m.elem (i, j) = ch;
-            }
-        }
+  // We always use big-endian order for hex digits.
+  bool is_float = val.is_single_type () || val.is_double_type ();
+  bool swap_bytes = is_little_endian (is_float);
 
-      retval = m;
-    }
+  if (val.is_int8_type ())
+    result = num2hex (val.int8_array_value (), swap_bytes);
+  else if (val.is_int16_type ())
+    result = num2hex<octave_int16> (val.int16_array_value (), swap_bytes);
+  else if (val.is_int32_type ())
+    result = num2hex<octave_int32> (val.int32_array_value (), swap_bytes);
+  else if (val.is_int64_type ())
+    result = num2hex<octave_int64> (val.int64_array_value (), swap_bytes);
+  else if (val.is_uint8_type ())
+    result = num2hex<octave_uint8> (val.uint8_array_value (), swap_bytes);
+  else if (val.is_uint16_type ())
+    result = num2hex<octave_uint16> (val.uint16_array_value (), swap_bytes);
+  else if (val.is_uint32_type ())
+    result = num2hex<octave_uint32> (val.uint32_array_value (), swap_bytes);
+  else if (val.is_uint64_type ())
+    result = num2hex<octave_uint64> (val.uint64_array_value (), swap_bytes);
+  else if (val.is_char_matrix ())
+    result = num2hex<char> (val.char_array_value (), swap_bytes);
+  else if (val.is_single_type ())
+    result = num2hex<float> (val.float_vector_value (), swap_bytes);
+  else if (val.is_double_type ())
+    result = num2hex<double> (val.vector_value (), swap_bytes);
   else
-    {
-      const ColumnVector v (args(0).vector_value ());
-
-      octave_idx_type nchars = 16;
-      octave_idx_type nr = v.numel ();
-      charMatrix m (nr, nchars);
-      const double *pv = v.fortran_vec ();
-
-      for (octave_idx_type i = 0; i < nr; i++)
-        {
-          union
-          {
-            uint64_t ival;
-            double dval;
-          } num;
+    err_wrong_type_arg ("num2hex", val);
 
-          num.dval = *pv++;
-
-          for (octave_idx_type j = 0; j < nchars; j++)
-            {
-              unsigned char ch =
-                static_cast<char>(num.ival >> ((nchars - 1 - j) * 4) & 0xF);
-              if (ch >= 10)
-                ch += 'a' - 10;
-              else
-                ch += '0';
-
-              m.elem (i, j) = ch;
-            }
-        }
-
-      retval = m;
-    }
-
-  return retval;
+  return (as_cell
+          ? octave_value (result)
+          : octave_value (result.string_vector_value ()));
 }
 
 /*
 %!assert (num2hex (-2:2), ["c000000000000000";"bff0000000000000";"0000000000000000";"3ff0000000000000";"4000000000000000"])
 %!assert (num2hex (single (-2:2)), ["c0000000";"bf800000";"00000000";"3f800000";"40000000"])
+%!assert (num2hex (intmax ("uint8")), "ff")
+%!assert (num2hex (intmax ("uint16")), "ffff")
+%!assert (num2hex (intmax ("uint32")), "ffffffff")
+%!assert (num2hex (intmax ("uint32")), "ffffffff")
+%!assert (num2hex (intmax ("uint64")), "ffffffffffffffff")
+
+%!assert (hex2num (num2hex (pi)), pi)
+%!assert (hex2num (num2hex (single (pi)), "single"), single (pi))
 
 %!error num2hex ()
 %!error num2hex (1,2)
+%!error num2hex (1,"foo")
+%!error num2hex (1,2,3)
 %!error num2hex (1j)
 */