Mercurial > octave
changeset 23356:ef20eee0247d
allow hex2num to handle integer values
* hex2num.cc: Handle integer types. Simplify using templates and
generic functions. Allow num2hex to return a cell array. Ensure hex
digits are always created and processed in big-endian order.
New tests.
* NEWS: Note changes.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Thu, 06 Apr 2017 13:18:51 -0400 |
parents | 08db4e9dd149 |
children | 426b593b4b6b |
files | NEWS libinterp/corefcn/hex2num.cc |
diffstat | 2 files changed, 259 insertions(+), 171 deletions(-) [+] |
line wrap: on
line diff
--- a/NEWS Thu Apr 06 13:12:09 2017 -0400 +++ b/NEWS Thu Apr 06 13:18:51 2017 -0400 @@ -19,6 +19,13 @@ storing and retrieving values by name, rather than by position which is how arrays work. + ** hex2num and num2hex now work for integer and char types and num2hex + may optionally return a cell array of strings instead of a character + array. If given a cell array of strings, hex2num now returns a + numeric array of the same size as the input cell array. Previously, + hex2num would accept a cell array of strings of arbitrary dimension + but would always return a column vector. + ** The "names" option used in regular expressions now returns a struct array, rather than a struct with a cell array for each field. This change was made for Matlab compatibility.
--- a/libinterp/corefcn/hex2num.cc Thu Apr 06 13:12:09 2017 -0400 +++ b/libinterp/corefcn/hex2num.cc Thu Apr 06 13:18:51 2017 -0400 @@ -1,5 +1,6 @@ /* +Copyright (C) 2017 John W. Eaton Copyright (C) 2008-2017 David Bateman This file is part of Octave. @@ -24,23 +25,96 @@ # include "config.h" #endif -#include <algorithm> - #include "defun.h" #include "error.h" #include "errwarn.h" +#include "mach-info.h" +#include "ov.h" #include "ovl.h" #include "utils.h" +static inline bool +is_little_endian (bool is_float) +{ + return ((is_float && (octave::mach_info::native_float_format () + == octave::mach_info::flt_fmt_ieee_little_endian)) + || octave::mach_info::words_little_endian ()); +} + +static uint8_t +hex2nibble (unsigned char ch) +{ + unsigned char val = 0; + + if (! isxdigit (ch)) + error ("hex2num: invalid character '%c' found in string S", ch); + + if (ch >= 'a') + val = static_cast<unsigned char> (ch - 'a' + 10); + else if (ch >= 'A') + val = static_cast<unsigned char> (ch - 'A' + 10); + else + val = static_cast<unsigned char> (ch - '0'); + + return val; +} + +static void +hex2num (const std::string& hex, void *num, size_t nbytes, bool swap_bytes) +{ + unsigned char *cp = reinterpret_cast<unsigned char *> (num); + + const size_t nc = hex.length (); + const size_t nchars = 2 * nbytes; + + if (nc > nchars) + error ("hex2num: S must be no more than %d characters", nchars); + + size_t j = 0; + + for (size_t i = 0; i < nbytes; i++) + { + size_t k = swap_bytes ? nbytes - i - 1 : i; + + unsigned char ch1 = (j < nc) ? hex[j++] : '0'; + unsigned char ch2 = (j < nc) ? hex[j++] : '0'; + + cp[k] = (hex2nibble (ch1) << 4) + hex2nibble (ch2); + } +} + +template <typename T> +Array<T> +hex2num (const Array<std::string>& val, bool swap_bytes) +{ + octave_idx_type nel = val.numel (); + + Array<T> m (val.dims ()); + + size_t nbytes = sizeof (T); + + for (octave_idx_type i = 0; i < nel; i++) + { + T num; + + hex2num (val.xelem (i), &num, nbytes, swap_bytes); + + m(i) = num; + } + + return m; +} + DEFUN (hex2num, args, , doc: /* -*- texinfo -*- @deftypefn {} {@var{n} =} hex2num (@var{s}) @deftypefnx {} {@var{n} =} hex2num (@var{s}, @var{class}) -Typecast the 16 character hexadecimal character string to an IEEE 754 -double precision number. +Typecast a hexadecimal character array or cell array of strings to an +array of numbers. -If fewer than 16 characters are given the strings are right padded with -@qcode{'0'} characters. +By default, the input array is interpreted as a hexadecimal number +representing a double precision value. If fewer than 16 characters are +given the strings are right padded with @qcode{'0'} characters. Given a string matrix, @code{hex2num} treats each row as a separate number. @@ -51,10 +125,25 @@ @end group @end example -The optional argument @var{class} can be passed as the string -@qcode{"single"} to specify that the given string should be interpreted as -a single precision number. In this case, @var{s} should be an 8 character -hexadecimal string. For example: +The optional second argument @var{class} may be used to cause the input +array to be interpreted as a different value type. Possible values are + +@multitable {Option} {Characters} +@headitem Option @tab Characters +@item @qcode{"int8"} @tab 2 +@item @qcode{"uint8"} @tab 2 +@item @qcode{"int16"} @tab 4 +@item @qcode{"uint16"} @tab 4 +@item @qcode{"int32"} @tab 8 +@item @qcode{"uint32"} @tab 8 +@item @qcode{"int64"} @tab 16 +@item @qcode{"uint64"} @tab 16 +@item @qcode{"char"} @tab 2 +@item @qcode{"single"} @tab 8 +@item @qcode{"double"} @tab 16 +@end multitable + +For example: @example @group @@ -65,104 +154,47 @@ @seealso{num2hex, hex2dec, dec2hex} @end deftypefn */) { + octave_value retval; + int nargin = args.length (); if (nargin < 1 || nargin > 2) print_usage (); - if (nargin == 2 && ! args(1).is_string ()) - error ("hex2num: CLASS must be a string"); - - const charMatrix cmat = args(0).char_matrix_value (); - std::string prec = (nargin == 2) ? args(1).string_value () : "double"; - bool is_single = (prec == "single"); - octave_idx_type nchars = (is_single) ? 8 : 16; - - if (cmat.columns () > nchars) - error ("hex2num: S must be no more than %d characters", nchars); - else if (prec != "double" && prec != "single") - error ("hex2num: CLASS must be either \"double\" or \"single\""); - - octave_value retval; - octave_idx_type nr = cmat.rows (); - octave_idx_type nc = cmat.columns (); - - if (is_single) - { - FloatColumnVector m (nr); + std::string type = "double"; + if (nargin == 2) + type = args(1).xstring_value ("hex2num: CLASS must be a string"); - for (octave_idx_type i = 0; i < nr; i++) - { - union - { - uint32_t ival; - float dval; - } num; - - num.ival = 0; + Array<std::string> val = args(0).cellstr_value (); - for (octave_idx_type j = 0; j < nc; j++) - { - unsigned char ch = cmat.elem (i, j); - - if (! isxdigit (ch)) - error ("hex2num: illegal character found in string S"); - - num.ival <<= 4; - if (ch >= 'a') - num.ival += static_cast<uint32_t> (ch - 'a' + 10); - else if (ch >= 'A') - num.ival += static_cast<uint32_t> (ch - 'A' + 10); - else - num.ival += static_cast<uint32_t> (ch - '0'); - } + // We always use big-endian order for hex digits. + bool is_float = type == "single" || type == "double"; + bool swap_bytes = is_little_endian (is_float); - if (nc < nchars) - num.ival <<= (nchars - nc) * 4; - - m(i) = num.dval; - } - - retval = m; - } + if (type == "int8") + retval = octave_value (hex2num<octave_int8> (val, swap_bytes)); + else if (type == "uint8") + retval = octave_value (hex2num<octave_uint8> (val, swap_bytes)); + else if (type == "int16") + retval = octave_value (hex2num<octave_int16> (val, swap_bytes)); + else if (type == "uint16") + retval = octave_value (hex2num<octave_uint16> (val, swap_bytes)); + else if (type == "int32") + retval = octave_value (hex2num<octave_int32> (val, swap_bytes)); + else if (type == "uint32") + retval = octave_value (hex2num<octave_uint32> (val, swap_bytes)); + else if (type == "int64") + retval = octave_value (hex2num<octave_int64> (val, swap_bytes)); + else if (type == "uint64") + retval = octave_value (hex2num<octave_uint64> (val, swap_bytes)); + else if (type == "char") + retval = octave_value (hex2num<char> (val, swap_bytes)); + else if (type == "single") + retval = octave_value (hex2num<float> (val, swap_bytes)); + else if (type == "double") + retval = octave_value (hex2num<double> (val, swap_bytes)); else - { - ColumnVector m (nr); - - for (octave_idx_type i = 0; i < nr; i++) - { - union - { - uint64_t ival; - double dval; - } num; - - num.ival = 0; - - for (octave_idx_type j = 0; j < nc; j++) - { - unsigned char ch = cmat.elem (i, j); - - if (! isxdigit (ch)) - error ("hex2num: illegal character found in string S"); - - num.ival <<= 4; - if (ch >= 'a') - num.ival += static_cast<uint64_t> (ch - 'a' + 10); - else if (ch >= 'A') - num.ival += static_cast<uint64_t> (ch - 'A' + 10); - else - num.ival += static_cast<uint64_t> (ch - '0'); - } - - if (nc < nchars) - num.ival <<= (nchars - nc) * 4; - - m(i) = num.dval; - } - - retval = m; - } + error ("hex2num: unrecognized CLASS '%s'", type); return retval; } @@ -170,13 +202,72 @@ /* %!assert (hex2num (["c00";"bff";"000";"3ff";"400"]), [-2:2]') %!assert (hex2num (["c00";"bf8";"000";"3f8";"400"], "single"), single([-2:2])') +%!assert (hex2num ("ff", "uint8"), intmax ("uint8")) +%!assert (hex2num ("ffff", "uint16"), intmax ("uint16")) +%!assert (hex2num ("ffffffff", "uint32"), intmax ("uint32")) +%!assert (hex2num ("ffffffff", "uint32"), intmax ("uint32")) +%!assert (hex2num ("ffffffffffffffff", "uint64"), intmax ("uint64")) */ +static inline unsigned char +nibble2hex (unsigned char ch) +{ + if (ch >= 10) + ch += 'a' - 10; + else + ch += '0'; + + return ch; +} + +static inline void +num2hex (const void *p, size_t n, char *hex, bool swap_bytes) +{ + const unsigned char *cp = reinterpret_cast<const unsigned char *> (p); + + size_t k = 0; + + for (size_t i = 0; i < n; i++) + { + size_t j = swap_bytes ? n - i - 1 : i; + + unsigned char ch = cp[j]; + + hex[k++] = nibble2hex ((ch >> 4) & 0xF); + hex[k++] = nibble2hex (ch & 0xF); + } +} + +template <typename T> +Cell +num2hex (const Array<T>& v, bool swap_bytes) +{ + const size_t nbytes = sizeof (T); + const size_t nchars = 2 * nbytes; + + octave_idx_type nel = v.numel (); + + string_vector sv (nel); + + const T *pv = v.fortran_vec (); + + for (octave_idx_type i = 0; i < nel; i++) + { + char hex[nchars]; + + num2hex (pv++, nbytes, hex, swap_bytes); + + sv[i] = std::string (hex, nchars); + } + + return Cell (v.dims (), sv); +} + DEFUN (num2hex, args, , doc: /* -*- texinfo -*- @deftypefn {} {@var{s} =} num2hex (@var{n}) -Typecast a double or single precision number or vector to a 8 or 16 -character hexadecimal string of the IEEE 754 representation of the number. +@deftypefnx {} {@var{s} =} num2hex (@var{n}, "cell") +Convert a numeric array to an array of hexadecimal strings. For example: @@ -202,94 +293,84 @@ 7f800000" @end group @end example + +With the optional second argument @qcode{"cell}, return a cell array of +strings instead of a character array. @seealso{hex2num, hex2dec, dec2hex} @end deftypefn */) { - if (args.length () != 1) + int nargin = args.length (); + + if (nargin < 1 || nargin > 2) print_usage (); - if (args(0).is_complex_type ()) + bool as_cell = false; + + if (nargin == 2) + { + std::string opt = args(1).xstring_value ("num2hex: second argument must be a string"); + if (opt == "cell") + as_cell = true; + else + error ("num2hex: unrecognized option '%s'", opt.c_str ()); + } + + octave_value val = args(0); + + if (val.is_complex_type ()) error ("num2hex: N must be real"); - octave_value retval; - - if (args(0).is_single_type ()) - { - const FloatColumnVector v (args(0).float_vector_value ()); - - octave_idx_type nchars = 8; - octave_idx_type nr = v.numel (); - charMatrix m (nr, nchars); - const float *pv = v.fortran_vec (); + Cell result; - for (octave_idx_type i = 0; i < nr; i++) - { - union - { - uint32_t ival; - float dval; - } num; - - num.dval = *pv++; - - for (octave_idx_type j = 0; j < nchars; j++) - { - unsigned char ch = - static_cast<char>(num.ival >> ((nchars - 1 - j) * 4) & 0xF); - if (ch >= 10) - ch += 'a' - 10; - else - ch += '0'; - - m.elem (i, j) = ch; - } - } + // We always use big-endian order for hex digits. + bool is_float = val.is_single_type () || val.is_double_type (); + bool swap_bytes = is_little_endian (is_float); - retval = m; - } + if (val.is_int8_type ()) + result = num2hex (val.int8_array_value (), swap_bytes); + else if (val.is_int16_type ()) + result = num2hex<octave_int16> (val.int16_array_value (), swap_bytes); + else if (val.is_int32_type ()) + result = num2hex<octave_int32> (val.int32_array_value (), swap_bytes); + else if (val.is_int64_type ()) + result = num2hex<octave_int64> (val.int64_array_value (), swap_bytes); + else if (val.is_uint8_type ()) + result = num2hex<octave_uint8> (val.uint8_array_value (), swap_bytes); + else if (val.is_uint16_type ()) + result = num2hex<octave_uint16> (val.uint16_array_value (), swap_bytes); + else if (val.is_uint32_type ()) + result = num2hex<octave_uint32> (val.uint32_array_value (), swap_bytes); + else if (val.is_uint64_type ()) + result = num2hex<octave_uint64> (val.uint64_array_value (), swap_bytes); + else if (val.is_char_matrix ()) + result = num2hex<char> (val.char_array_value (), swap_bytes); + else if (val.is_single_type ()) + result = num2hex<float> (val.float_vector_value (), swap_bytes); + else if (val.is_double_type ()) + result = num2hex<double> (val.vector_value (), swap_bytes); else - { - const ColumnVector v (args(0).vector_value ()); - - octave_idx_type nchars = 16; - octave_idx_type nr = v.numel (); - charMatrix m (nr, nchars); - const double *pv = v.fortran_vec (); - - for (octave_idx_type i = 0; i < nr; i++) - { - union - { - uint64_t ival; - double dval; - } num; + err_wrong_type_arg ("num2hex", val); - num.dval = *pv++; - - for (octave_idx_type j = 0; j < nchars; j++) - { - unsigned char ch = - static_cast<char>(num.ival >> ((nchars - 1 - j) * 4) & 0xF); - if (ch >= 10) - ch += 'a' - 10; - else - ch += '0'; - - m.elem (i, j) = ch; - } - } - - retval = m; - } - - return retval; + return (as_cell + ? octave_value (result) + : octave_value (result.string_vector_value ())); } /* %!assert (num2hex (-2:2), ["c000000000000000";"bff0000000000000";"0000000000000000";"3ff0000000000000";"4000000000000000"]) %!assert (num2hex (single (-2:2)), ["c0000000";"bf800000";"00000000";"3f800000";"40000000"]) +%!assert (num2hex (intmax ("uint8")), "ff") +%!assert (num2hex (intmax ("uint16")), "ffff") +%!assert (num2hex (intmax ("uint32")), "ffffffff") +%!assert (num2hex (intmax ("uint32")), "ffffffff") +%!assert (num2hex (intmax ("uint64")), "ffffffffffffffff") + +%!assert (hex2num (num2hex (pi)), pi) +%!assert (hex2num (num2hex (single (pi)), "single"), single (pi)) %!error num2hex () %!error num2hex (1,2) +%!error num2hex (1,"foo") +%!error num2hex (1,2,3) %!error num2hex (1j) */