changeset 31848:1f3f7e874203

dec2base.m: Accept negative and fractional inputs (bug #63282) Previously only dec2bin and dec2hex accepted negative inputs but not dec2base, which led to inconsistency and repeated code. This changeset centralizes the negative input functionality in dec2base, and eliminates that code in the other functions. It also adds the ability to accept fractional inputs in dec2base. dec2base.m: Add new functionality to handle negative inputs and fractional inputs. Update BISTs. Add documentation. dec2bin.m: Remove code to handle negative inputs. Pass through to dec2base. Add code for padding the output to Matlab-compatible sizes. Update BISTs. Add documentation. dec2base.m: Remove a matrix indexing bug. Pass through to dec2bin. Update BISTs. Add documentation. NEWS.9.md: Add note about new functionality.
author Arun Giridhar <arungiridhar@gmail.com>
date Fri, 24 Feb 2023 12:45:21 -0500
parents 318dbb0ce30d
children a098cc74d9a5
files etc/NEWS.9.md scripts/strings/dec2base.m scripts/strings/dec2bin.m scripts/strings/dec2hex.m
diffstat 4 files changed, 255 insertions(+), 77 deletions(-) [+]
line wrap: on
line diff
--- a/etc/NEWS.9.md	Fri Feb 24 09:36:30 2023 -0500
+++ b/etc/NEWS.9.md	Fri Feb 24 12:45:21 2023 -0500
@@ -9,6 +9,16 @@
 means that the file encoding specified in the `.oct-config` file for the
 respective directory is taken into account for the tests.
 
+- `dec2base`, `dec2bin`, and `dec2hex` have all been overhauled.  All three
+functions now accommodate negative inputs and fractional inputs, and repeated
+code between the functions has been reduced or eliminated.  Previously only
+`dec2bin` and `dec2hex` accepted negative inputs but `dec2base` did not, and
+none of the three accepted fractional inputs.  But now,
+`dec2base (100*pi, 16, 4, 6)` for exampele returns a base-16 string with four
+places for the integer part and six places for the fractional part.  Omitting
+the number of decimal places (the fourth input) retains old behavior for
+backward compatibility, except that non-integer inputs will no longer error.
+
 ### Graphical User Interface
 
 ### Graphics backend
--- a/scripts/strings/dec2base.m	Fri Feb 24 09:36:30 2023 -0500
+++ b/scripts/strings/dec2base.m	Fri Feb 24 12:45:21 2023 -0500
@@ -26,8 +26,9 @@
 ## -*- texinfo -*-
 ## @deftypefn  {} {@var{str} =} dec2base (@var{d}, @var{base})
 ## @deftypefnx {} {@var{str} =} dec2base (@var{d}, @var{base}, @var{len})
+## @deftypefnx {} {@var{str} =} dec2base (@var{d}, @var{base}, @var{len}, @var{decimals})
 ## Return a string of symbols in base @var{base} corresponding to the
-## non-negative integer @var{d}.
+## value @var{d}.
 ##
 ## @example
 ## @group
@@ -36,6 +37,10 @@
 ## @end group
 ## @end example
 ##
+## If @var{d} is negative, then the result will represent @var{d} in complement
+## notation.  For example, negative binary numbers are in twos-complement, and
+## analogously for other bases.
+##
 ## If @var{d} is a matrix or cell array, return a string matrix with one row
 ## per element in @var{d}, padded with leading zeros to the width of the
 ## largest value.
@@ -52,11 +57,38 @@
 ## @end example
 ##
 ## The optional third argument, @var{len}, specifies the minimum number of
-## digits in the result.
+## digits in the integer part of the result.  If this is omitted, then
+## @code{dec2base} uses enough digits to accommodate the input.
+##
+## The optional fourth argument, @var{decimals}, specifies the number of
+## digits to represent the fractional part of the input.  If this is omitted,
+## then it is set to zero, and @code{dec2base} returns an integer output for
+## backward compatibility.
+##
+## @example
+## @group
+## dec2base (100*pi, 16)
+## @result{} "13A"
+## dec2base (100*pi, 16, 4)
+## @result{} "013A"
+## dec2base (100*pi, 16, 4, 6)
+## @result{} "013A.28C59D"
+## dec2base (-100*pi, 16)
+## @result{} "EC6"
+## dec2base (-100*pi, 16, 4)
+## @result{} "FEC6"
+## dec2base (-100*pi, 16, 4, 6)
+## @result{} "FEC5.D73A63"
+## @end group
+## @end example
+##
+## Programming tip: When passing negative inputs to @code{dec2base}, it is
+## best to explicitly specify the length of the output required.
+##
 ## @seealso{base2dec, dec2bin, dec2hex}
 ## @end deftypefn
 
-function str = dec2base (d, base, len)
+function str = dec2base (d, base, len, decimals = 0)
 
   if (nargin < 2)
     print_usage ();
@@ -72,10 +104,23 @@
   ## Treat logical as numeric for compatibility with ML
   if (islogical (d))
     d = double (d);
-  elseif (! isnumeric (d) || iscomplex (d) || any (d < 0 | d != fix (d)))
-    error ("dec2base: input must be real non-negative integers");
+  elseif (! isnumeric (d) || iscomplex (d))
+    error ("dec2base: input must be real numbers");
   endif
 
+  ## Note which elements are negative for processing later.
+  ## This also needs special processing for the corresponding intmax.
+  belowlim = false(size(d));
+  if (isinteger (d))
+    belowlim = (d <= intmin(class(d)));
+  endif
+  neg = (d < 0);
+  d(neg) = -d(neg);
+
+  ## Pull out the fractional part for processing later
+  fracpart = d - floor (d);
+  d = floor (d);
+
   symbols = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
   if (ischar (base))
     symbols = base(:).';  # force a row vector
@@ -90,11 +135,10 @@
     error ("dec2base: BASE must be an integer between 2 and 36, or a string of symbols");
   endif
 
-  ## determine number of digits required to handle all numbers, can overflow
-  ## by 1 digit
+  ## Determine number of digits required to handle all numbers.
   max_len = round (log (max (max (d), 1)) / log (base)) + 1;
 
-  if (nargin == 3)
+  if (nargin >= 3)
     if (! (isscalar (len) && isreal (len) && len >= 0 && len == fix (len)))
       error ("dec2base: LEN must be a non-negative integer");
     endif
@@ -102,22 +146,91 @@
   endif
 
   ## determine digits for each number
-  digits = zeros (length (d), max_len);
+  digits = zeros (numel (d), max_len);
   for k = max_len:-1:1
     digits(:,k) = mod (d, base);
     d = round ((d - digits(:,k)) / base);
   endfor
 
-  ## convert digits to symbols
+  ## Compute any fractional part and append
+  if (nargin == 4 && decimals > 0)
+    digits2 = zeros (numel (d), decimals);
+    for k = 1:decimals
+      fracpart *= base;
+      digits2(:,k) = floor (fracpart);
+      fracpart -= floor (fracpart);
+    endfor
+  else
+    digits2 = zeros (rows (digits), 0);
+  endif
+
+  ## Handle negative inputs now
+  for k = find (neg)(:)'
+    digits(k, :) = (base-1) - digits(k, :);
+    if (! isempty (digits2))
+      digits2 (k, :) = (base-1) - digits2 (k, :);
+    endif
+
+    if (! isempty (digits2))
+      j = columns (digits2);
+      digits2 (k, j) += 1;  # this is a generalization of two's complement
+      while (digits2(j) >= base && j > 1)
+        digits2(k, j) -= base;
+        digits2(k, j-1) += 1;
+        j -= 1;
+      endwhile
+      if (digits2(k, 1) >= base)  # carry over to integer part
+        digits2(k, 1) -= base;
+        digits(k, end) += 1;
+      endif
+    else  # no fractional part ==> increment integer part
+      digits(k, end) += 1;
+    endif
+
+    if (belowlim (k))  # we need to handle an extra +1
+      digits(k, end) -= 1;
+      ## Reason: consider the input intmin("int64"),
+      ## which is -(2)^64 of type int64.
+      ## The code above takes its negation but that exceeds intmax("int64"),
+      ## so it's pegged back to 1 lower than what it needs to be, due to
+      ## the inherent limitation of the representation.
+      ## We add that 1 back here, but because the original sign was negative,
+      ## and we are dealing with complement notation, we subtract it instead.
+    endif
+
+    j = columns (digits);
+    while (digits(k, j) >= base && j > 1)
+      digits(k, j) -= base;
+      digits(k, j-1) += 1;
+      j -= 1;
+    endwhile
+
+    if (digits(k, 1) >= base)  # augment by one place if really needed
+      digits(k, 1) -= base;
+      digits = [zeros(rows(digits), 1), digits];
+      digits(k, 1) += 1;
+      ## FIXME Should we left-pad with zeros or with (base-1) in this context?
+    endif
+  endfor
+
+  ## Convert digits to symbols: integer part
   str = reshape (symbols(digits+1), size (digits));
 
+  ## Convert digits to symbols: fractional part
+  ## Append fractional part to str if needed.
+  if (! isempty (digits2))
+    str2 = reshape (symbols(digits2+1), size (digits2));
+    str = [str, repmat('.', rows(str), 1), str2];
+  endif
+
   ## Check if the first element is the zero symbol.  It seems possible
   ## that LEN is provided, and is less than the computed MAX_LEN and
   ## MAX_LEN is computed to be one larger than necessary, so we would
   ## have a leading zero to remove.  But if LEN >= MAX_LEN, we should
   ## not remove any leading zeros.
-  if ((nargin == 2 || (nargin == 3 && max_len > len))
-      && columns (str) != 1 && ! any (str(:,1) != symbols(1)))
+  if ((nargin == 2 || (nargin >= 3 && max_len > len))
+      && columns (str) != 1 && ! any (str(:,1) != symbols(1))
+      && (~any(neg)))
     str = str(:,2:end);
   endif
 
@@ -135,12 +248,49 @@
 %!   s0 = [s0,'0'];
 %! endfor
 
+## Test positive fractional inputs
+%!assert (dec2base (pi,  2, 0, 16), "11.0010010000111111")
+%!assert (dec2base ( e,  2, 2, 16), "10.1011011111100001")
+%!assert (dec2base (pi,  3, 0, 16), "10.0102110122220102")
+%!assert (dec2base ( e,  3, 0, 16), "2.2011011212211020")
+%!assert (dec2base (pi, 16, 0, 10), "3.243F6A8885")
+%!assert (dec2base ( e, 16, 0, 10), "2.B7E151628A")
+
+## Test negative inputs: all correct in complement notation
+%!assert (dec2base (-1,   10),        "9")
+%!assert (dec2base (-1,   10, 3),     "999")
+%!assert (dec2base (-1,   10, 3,  2), "999.00")
+%!assert (dec2base (-1.1, 10, 3,  2), "998.90")
+%!assert (dec2base (-pi,  2,  8, 16), "11111100.1101101111000001")
+%!assert (dec2base (-pi,  3,  8, 16), "22222212.2120112100002121")
+%!assert (dec2base (-pi, 16,  8, 10), "FFFFFFFC.DBC095777B")
+%!assert (dec2base ( -e,  2,  8, 16), "11111101.0100100000011111")
+%!assert (dec2base ( -e,  3,  8, 16), "22222220.0211211010011210")
+%!assert (dec2base ( -e, 16,  8, 10), "FFFFFFFD.481EAE9D76")
+
+## Test negative inputs close to powers of bases
+%!assert (dec2base (-128, 2), "10000000")
+%!assert (dec2base (-129, 2, 9), "101111111")
+%!assert (dec2base (-129, 2), "01111111")
+## FIXME: should dec2base (-129, 2) return "01111111" or ""101111111"?
+## The second is an explicit 9-bit universe. The first is an implied 9-bit
+## universe but the user needs to be careful not to mistake it for +127, which
+## is true in modular arithmetic anyway (i.e., +127 == -129 in 8-bits).
+## Currently we work around this by telling the user in `help dec2base` to
+## explicitly set the lengths when working with negative numbers.
+
+## Test intmin values
+%!assert (dec2base (intmin ("int8"), 2), "10000000")
+%!assert (dec2base (intmin ("int16"), 2), "1000000000000000")
+%!assert (dec2base (intmin ("int32"), 2), "10000000000000000000000000000000")
+%!assert (dec2base (intmin ("int64"), 2), "1000000000000000000000000000000000000000000000000000000000000000")
+
 %!test
 %! digits = "0123456789ABCDEF";
 %! for n = 1:13
 %!   for b = 2:16
 %!     pm = dec2base (b^n-1, b);
-%!     assert (length (pm), n);
+%!     assert (numel (pm), n);
 %!     assert (all (pm == digits(b)));
 %!   endfor
 %! endfor
@@ -166,10 +316,8 @@
 ## Test input validation
 %!error <Invalid call> dec2base ()
 %!error <Invalid call> dec2base (1)
-%!error <input must be real non-negative integers> dec2base ("A", 10)
-%!error <input must be real non-negative integers> dec2base (2i, 10)
-%!error <input must be real non-negative integers> dec2base (-1, 10)
-%!error <input must be real non-negative integers> dec2base (1.1, 10)
+%!error <dec2base: input must be real numbers> dec2base ("A", 10)
+%!error <dec2base: input must be real numbers> dec2base (2i, 10)
 %!error <symbols representing digits must be unique> dec2base (1, "ABA")
 %!error <whitespace characters are not valid symbols> dec2base (1, "A B")
 %!error <BASE must be an integer> dec2base (1, ones (2))
--- a/scripts/strings/dec2bin.m	Fri Feb 24 09:36:30 2023 -0500
+++ b/scripts/strings/dec2bin.m	Fri Feb 24 12:45:21 2023 -0500
@@ -39,9 +39,7 @@
 ## For negative elements of @var{d}, return the binary value of the two's
 ## complement.  The result is padded with leading ones to 8, 16, 32, or 64
 ## bits as appropriate for the magnitude of the input.  Positive input
-## elements are padded with leading zeros to the same width.  If the second
-## argument @var{len} exceeds that calculated width, the result is further
-## padded with leading zeros, for compatibility with @sc{matlab}.
+## elements are padded with leading zeros to the same width.
 ##
 ## Examples:
 ##
@@ -55,12 +53,11 @@
 ## @end group
 ## @end example
 ##
-## Known @sc{matlab} Incompatibility: @sc{matlab}'s @code{dec2bin} allows
-## non-integer values for @var{d} as of Release 2022b, but is inconsistent
-## with truncation versus rounding and is also inconsistent with its own
-## @code{dec2hex} function.  For self-consistency, Octave gives an error for
-## non-integer inputs.  Users requiring compatible code for non-integer inputs
-## should make use of @code{fix} or @code{round} as appropriate.
+## Programming tip: @code{dec2bin} discards any fractional part of the input.
+## If you need the fractional part to be converted too, call @code{dec2base}
+## with a non-zero number of decimal places.  You can also use @code{fix} or
+## @code{round} on fractional inputs to ensure predictable rounding behavior.
+##
 ## @seealso{bin2dec, dec2base, dec2hex}
 ## @end deftypefn
 
@@ -73,49 +70,43 @@
   if (iscell (d))
     d = cell2mat (d);
   endif
-
-  if (! isnumeric (d) || iscomplex (d) || any (d(:) != round (d(:))))
-    error ("dec2bin: input must be integers");
-  endif
-
-  ## Create column vector for algorithm (output is always col. vector anyways)
   d = d(:);
 
-  neg = (d < 0);  # keep track of which elements are negative
-  if (any (neg))  # must be a signed type
-    ## Cast to a suitable signed integer type, then to unsigned.
-    ## Ensure that the left-most bit of the unsigned number is 1,
-    ## to signify negative input.
-    tmp = int64 (d);
-    if (all (tmp >= -128 & tmp <= 127))
-      d = int8 (d);
-      d(neg) = (d(neg) + intmax (d)) + 1;
-      d = uint8 (d);
-      d(neg) += uint8 (128);
-    elseif (all (tmp >= -32768 & tmp <= 32767))
-      d = int16 (d);
-      d(neg) = (d(neg) + intmax (d)) + 1;
-      d = uint16 (d);
-      d(neg) += uint16 (32768);
-    elseif (all (tmp >= -2147483648 & tmp <= 2147483647))
-      d = int32 (d);
-      d(neg) = (d(neg) + intmax (d)) + 1;
-      d = uint32 (d);
-      d(neg) += uint32 (2147483648);
-    else
-      d = int64 (d);
-      d(neg) = (d(neg) + intmax (d)) + 1;
-      d = uint64 (d);
-      d(neg) += uint64 (9223372036854775808);
-    endif
+  if (nargin == 1)
+    bstr = dec2base (d, 2);  # this will use a default len picked by dec2base
+  else  # nargin == 2
+    bstr = dec2base (d, 2, len);
+  endif
+
+  if (all (d >= 0))
+    return
   endif
 
-  if (nargin == 1)
-    bstr = dec2base (d, 2);
+  ## If we are here, there are negative inputs, so we need to
+  ## left-pad those outputs with ones to Matlab-compatible lengths.
+  len = columns (bstr);
+  if (all (d >= -128 & d <= 127))
+    len = max (len, 8);  # pad to 8 bits
+  elseif (all (d >= -32768 & d <= 32767))
+    len = max (len, 16);  # pad to 16 bits
+  elseif (all (d >= -2147483648 & d <= 2147483647))
+    len = max (len, 32);  # pad to 32 bits
   else
-    bstr = dec2base (d, 2, len);
+    len = max (len, 64);  # pad to 64 bits
   endif
 
+  tmp = repmat (' ', rows (bstr), len);
+  tmp(:, (end+1-columns(bstr)):end) = bstr;  # left-pad with spaces
+  bstr = tmp;
+
+  ## Change spaces to "1" for negative inputs
+  tmp = bstr(d < 0, :);
+  tmp(tmp == ' ') = '1';
+  bstr(d < 0, :) = tmp;
+
+  ## Change all other spaces to "0".
+  bstr(bstr == ' ') = '0';
+
 endfunction
 
 
@@ -129,7 +120,7 @@
 ## Test negative inputs
 %!assert (dec2bin (-3), "11111101")
 %!assert (dec2bin (-3, 3), "11111101")
-%!assert (dec2bin (-3, 9), "011111101")
+%!assert (dec2bin (-3, 9), "111111101")
 %!assert (dec2bin (-2^7 - 1), "1111111101111111")
 %!assert (dec2bin (-2^15 - 1), "11111111111111110111111111111111")
 %!assert (dec2bin (-2^31 - 1),
@@ -151,11 +142,13 @@
 %!assert (dec2bin ({1, 2; 3, -4}),
 %!        ["00000001"; "00000011"; "00000010"; "11111100"])
 
+## Test fractional inputs
+%!assert (dec2bin (+2.1), "10")
+%!assert (dec2bin (-2.1), "11111110")
+%!assert (dec2bin (+2.9), "10")
+%!assert (dec2bin (-2.9), "11111110")
+
 ## Test input validation
 %!error <Invalid call> dec2bin ()
-%!error <input must be integer> dec2bin (+2.1)
-%!error <input must be integer> dec2bin (-2.1)
-%!error <input must be integer> dec2bin (+2.9)
-%!error <input must be integer> dec2bin (-2.9)
-%!error <input must be integer> dec2bin (1+i)
+%!error <input must be real> dec2bin (1+i);
 
--- a/scripts/strings/dec2hex.m	Fri Feb 24 09:36:30 2023 -0500
+++ b/scripts/strings/dec2hex.m	Fri Feb 24 12:45:21 2023 -0500
@@ -29,8 +29,8 @@
 ## Return a string representing the conversion of the integer @var{d} to a
 ## hexadecimal (base16) number.
 ##
-## If @var{d} is negative, return the hexadecimal equivalent of the two's
-## complement binary value of @var{d}.
+## If @var{d} is negative, return the hexadecimal complement of @var{d}.
+##
 ## If @var{d} is a matrix or cell array, return a string matrix with one row
 ## for each element in @var{d}, padded with leading zeros to the width of the
 ## largest value.
@@ -50,6 +50,11 @@
 ## @end group
 ## @end example
 ##
+## Programming tip: @code{dec2hex} discards any fractional part of the input.
+## If you need the fractional part to be converted too, call @code{dec2base}
+## with a non-zero number of decimal places.  You can also use @code{fix} or
+## @code{round} on fractional inputs to ensure predictable rounding behavior.
+##
 ## @seealso{hex2dec, dec2base, dec2bin}
 ## @end deftypefn
 
@@ -59,24 +64,40 @@
     print_usage ();
   endif
 
-  ## To avoid repeating a lot of code, including input validation, we call dec2bin.
+  if (iscell (d))
+    d = cell2mat (d);
+  endif
+  d = d(:);
+
+  neg = (d < 0);
+
   if (nargin == 2)
     d = dec2bin (d, len*4);
   else
     d = dec2bin (d);
   endif
 
-  ## Left-pad with zeros to make the number of columns divisible by 4
+  ## Left-pad to a multiple of 4 columns.
   n = mod (columns (d), 4);
   if (n > 0)
-    d = [repmat("0", rows(d), 4-n), d];
+    tmp = "01"(neg + 1);  # leftpad with "0" for positive, "1" for negative
+    d = [repmat(tmp(:), 1, 4 - n), d];
   endif
 
-  d -= "0"; # convert to numeric
+  d -= '0';  # convert to numeric
   d = d(:, 1:4:end) * 8 + d(:, 2:4:end) * 4 + d(:, 3:4:end) * 2 + d(:, 4:4:end);
-  ## Elements of d are now in the range 0 to 15
+  ## Elements of d are now in the range 0 to 15.
 
-  hstr = "0123456789ABCDEF"(d+1); # convert to char and return
+  ## Convert to char matrix and return.
+  ## We used to return this in a single line:
+  ##    hstr = ("0123456789ABCDEF")(d+1);
+  ## But there are edge cases governing the sizes of row and column vectors
+  ## that cause problems with output size, so we use a loop instead.
+  hstr = repmat (' ', size (d));
+  v = "0123456789ABCDEF";
+  for t = 0:15
+    hstr(d == t) = v(t + 1);
+  endfor
 
 endfunction
 
@@ -90,7 +111,7 @@
 ## Test negative inputs
 %!assert (dec2hex (-3), "FD")
 %!assert (dec2hex (-3, 1), "FD")
-%!assert (dec2hex (-3, 3), "0FD")
+%!assert (dec2hex (-3, 3), "FFD")
 %!assert (dec2hex (-2^7 - 1), "FF7F")
 %!assert (dec2hex (-2^15 - 1), "FFFF7FFF")
 %!assert (dec2hex (-2^31 - 1), "FFFFFFFF7FFFFFFF")
@@ -103,6 +124,12 @@
 %!assert (dec2hex ([1, 2; 3, -4]), ["01"; "03"; "02"; "FC"])
 %!assert (dec2hex ({1, 2; 3, -4}), ["01"; "03"; "02"; "FC"])
 
+## Test that the output is of the correct size.
+## Next line should return a column vector:
+%!assert (dec2hex (0:15), "0123456789ABCDEF"(:))
+## Next line should return a row vector:
+%!assert (dec2hex (uint64 (18364758544493064720)), "FEDCBA9876543210")
+
 ## Test input validation
 %!error <Invalid call> dec2hex ()