changeset 24793:1fa1869650cc

hist.m: Overhaul function (bug #53199). * hist.m: Rewrite docstring. Remove Infinite/NaN values before calculating limits of data. Use range type to preserve accuracy during calculations of bins. Truncate scalar bins to an integer value. Correctly emit an error if second input is not a scalar or a vector. Use issorted to more quickly determine whether list of bin centers is sorted. Validate normalization constant is numeric and greater than 0. Eliminate bsxfun and just use broadcasting. Reverse nargout test so that most probable code path is taken first. Use physical transpose, rather than hermitian transpose, to be clear. Add BIST test for bug #53199. Add BIST tests for input validation. Format BIST tests to use Octave coding conventions.
author Rik <rik@octave.org>
date Mon, 26 Feb 2018 08:48:43 -0800
parents 3390adaee21d
children 1cacfebaed3e
files scripts/plot/draw/hist.m
diffstat 1 files changed, 140 insertions(+), 77 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/plot/draw/hist.m	Sat Feb 24 16:05:35 2018 +0100
+++ b/scripts/plot/draw/hist.m	Mon Feb 26 08:48:43 2018 -0800
@@ -18,8 +18,8 @@
 
 ## -*- texinfo -*-
 ## @deftypefn  {} {} hist (@var{y})
+## @deftypefnx {} {} hist (@var{y}, @var{nbins})
 ## @deftypefnx {} {} hist (@var{y}, @var{x})
-## @deftypefnx {} {} hist (@var{y}, @var{nbins})
 ## @deftypefnx {} {} hist (@var{y}, @var{x}, @var{norm})
 ## @deftypefnx {} {} hist (@dots{}, @var{prop}, @var{val}, @dots{})
 ## @deftypefnx {} {} hist (@var{hax}, @dots{})
@@ -28,22 +28,23 @@
 ##
 ## With one vector input argument, @var{y}, plot a histogram of the values
 ## with 10 bins.  The range of the histogram bins is determined by the
-## range of the data.  With one matrix input argument, @var{y}, plot a
-## histogram where each bin contains a bar per input column.
+## range of the data (difference between maximum and minimum value in @var{y}).
+## Extreme values are lumped into the first and last bins.  If @var{y} is a
+## matrix then plot a histogram where each bin contains one bar per input
+## column of @var{y}.
 ##
-## Given a second vector argument, @var{x}, use that as the centers of
-## the bins, with the width of the bins determined from the adjacent
-## values in the vector.
+## If the optional second argument is a scalar, @var{nbins}, it defines the
+## number of bins.
 ##
-## If scalar, the second argument, @var{nbins}, defines the number of bins.
+## If the optional second argument is a vector, @var{x}, it defines the centers
+## of the bins.  The width of the bins is determined from the adjacent values
+## in the vector.  The total number of bins is @code{numel (@var{x})}.
 ##
 ## If a third argument is provided, the histogram is normalized such that
 ## the sum of the bars is equal to @var{norm}.
 ##
-## Extreme values are lumped into the first and last bins.
-##
 ## The histogram's appearance may be modified by specifying property/value
-## pairs.  For example the face and edge color may be modified.
+## pairs.  For example, the face and edge color may be modified:
 ##
 ## @example
 ## @group
@@ -64,9 +65,9 @@
 ## If the first argument @var{hax} is an axes handle, then plot into this axes,
 ## rather than the current axes returned by @code{gca}.
 ##
-## With two output arguments, produce the values @var{nn} (numbers of elements)
-## and @var{xx} (bin centers) such that @code{bar (@var{xx}, @var{nn})} will
-## plot the histogram.
+## If an output is requested then no plot is made.  Instead, return the values
+## @var{nn} (numbers of elements) and @var{xx} (bin centers) such that
+## @code{bar (@var{xx}, @var{nn})} will plot the histogram.
 ##
 ## @seealso{histc, bar, pie, rose}
 ## @end deftypefn
@@ -81,61 +82,77 @@
     print_usage ();
   endif
 
-  y = varargin{1};
-  varargin = varargin(2:end);
+  ## Process Y argument
+  iarg = 1;
+  y = varargin{iarg++};
+
+  if (! isreal (y))
+    error ("hist: Y must be real-valued");
+  endif
 
   arg_is_vector = isvector (y);
-
   if (arg_is_vector)
     y = y(:);
   endif
 
-  if (! isreal (y))
-    error ("hist: Y must be real valued");
-  endif
-
-  max_val = max (y(:));
-  min_val = min (y(:));
+  yfinite = y(isfinite (y))(:);
+  max_val = max (yfinite);
+  min_val = min (yfinite);
   ## Do not convert if input is of class single (or if already is double).
   if (! isfloat (y))
     max_val = double (max_val);
     min_val = double (min_val);
   endif
 
-  iarg = 1;
+  ## Process possible second argument
   if (nargin == 1 || ischar (varargin{iarg}))
     n = 10;
-    x = [0.5:n]'/n;
-    x = (max_val - min_val) * x + min_val * ones (size (x));
+    ## Use range type to preserve accuracy
+    x = (0.5:n) * (1/n);
+    x = (max_val - min_val) * x + min_val;
+    x = x.';  # Convert to matrix;
   else
-    ## nargin is either 2 or 3
+    ## Parse bin specification argument
     x = varargin{iarg++};
+    if (! isreal (x))
+      error ("hist: bin specification must be a numeric scalar or vector");
+    endif
+
     ## Do not convert if input is of class single (or if already is double).
     if (! isfloat (x))
       x = double (x);
     endif
 
     if (isscalar (x))
-      n = x;
+      n = fix (x);
       if (n <= 0)
         error ("hist: number of bins NBINS must be positive");
       endif
-      x = [0.5:n]'/n;
-      x = (max_val - min_val) * x  + min_val * ones (size (x));
-    elseif (isreal (x))
-      if (isvector (x))
-        x = x(:);
-      endif
-      xsort = sort (x);
-      if (any (xsort != x))
-        warning ("hist: bin values not sorted on input");
-        x = xsort;
+      ## Use range type to preserve accuracy
+      x = (0.5:n) * (1/n);
+      x = (max_val - min_val) * x + min_val;
+      x = x.';  # Convert to matrix;
+    elseif (isvector (x))
+      x = x(:);
+      if (! issorted (x))
+        warning ("hist: bin values X not sorted on input");
+        x = sort (x);
       endif
     else
-      error ("hist: second argument must be a scalar or a vector");
+      error ("hist: bin specification must be a scalar or vector");
     endif
   endif
 
+  ## Check for third argument (normalization)
+  norm = false;
+  if (nargin > 2 && ! ischar (varargin{iarg}))
+    norm = varargin{iarg++};
+    if (! isnumeric (norm) || ! all (norm > 0))
+      error ("hist: NORM must be a numeric constant > 0");
+    endif
+  endif
+
+  ## Perform histogram calculation
   cutoff = (x(1:end-1,:) + x(2:end,:)) / 2;
   if (isinteger (y))
     cutoff = floor (cutoff);
@@ -163,26 +180,25 @@
 
   freq = diff (chist);
 
-  if (nargin > 2 && ! ischar (varargin{iarg}))
-    ## Normalize the histogram.
-    norm = varargin{iarg++};
-    freq = bsxfun (@times, freq, norm ./ sum (! isnan (y)));
+  if (norm)
+    ## Normalize the histogram
+    freq = freq .* norm ./ sum (! isnan (y));
   endif
 
-  if (nargout > 0)
+  if (nargout == 0)
+    if (isempty (hax))
+      hax = gca ();
+    endif
+    bar (hax, x, freq, "hist", varargin{iarg:end});
+  else
     if (arg_is_vector)
       ## Matlab compatibility requires a row vector return
-      nn = freq';
-      xx = x';
+      nn = freq.';
+      xx = x.';
     else
       nn = freq;
       xx = x;
     endif
-  else
-    if (isempty (hax))
-      hax = gca ();
-    endif
-    bar (hax, x, freq, "hist", varargin{iarg:end});
   endif
 
 endfunction
@@ -197,17 +213,17 @@
 %! assert (xx, [1.5,2.5,3.5]);
 %! assert (nn, [2,1,1]);
 %!test
-%! [nn,xx] = hist ([1 1 1 NaN NaN NaN 2 2 3],[1 2 3]);
+%! [nn,xx] = hist ([1 1 1 NaN NaN NaN 2 2 3], [1, 2, 3]);
 %! assert (xx, [1,2,3]);
 %! assert (nn, [3,2,1]);
 %!test
-%! [nn,xx] = hist ([1 1 1 NaN NaN NaN 2 2 3],[1 2 3], 6);
+%! [nn,xx] = hist ([1 1 1 NaN NaN NaN 2 2 3], [1, 2, 3], 6);
 %! assert (xx, [1,2,3]);
 %! assert (nn, [3,2,1]);
-%!test
+%!test  # Multiple columns
 %! [nn,xx] = hist ([[1:4]', [1:4]'], 3);
 %! assert (xx, [1.5;2.5;3.5]);
-%! assert (nn, [[2,1,1]',[2,1,1]']);
+%! assert (nn, [[2,1,1]', [2,1,1]']);
 %!test
 %! for n = [10, 30, 100, 1000]
 %!   assert (sum (hist ([1:n], n)), n);
@@ -217,7 +233,7 @@
 %!   assert (sum (hist ([1:n], 30)), n);
 %! endfor
 %!assert (hist (1,1), 1)
-%!assert (size (hist (randn (750,240), 200)), [200,240])
+%!assert (size (hist (randn (750,240), 200)), [200, 240])
 %!assert <*42394> (isempty (hist (rand (10,2), 0:5, 1)), false)
 %!assert <*42394> (isempty (hist (rand (10,2), 0:5, [1 1])), false)
 
@@ -226,55 +242,102 @@
 %!      2  3  1  1  6  5  5  3  9  9  1  1  8  7  7  2  4  1];
 %! [n, x] = hist (y, 10);
 %! [nn, xx] = hist (uint8 (y), 10);
-%! assert (nn, n)
-%! assert (xx, x)
+%! assert (nn, n);
+%! assert (xx, x);
 %!
 %! ## test again with N > 30 because there's a special case for it
 %! [n, x] = hist (y, 35);
 %! [nn, xx] = hist (uint8 (y), 35);
-%! assert (nn, n)
-%! assert (xx, x)
+%! assert (nn, n);
+%! assert (xx, x);
 
 ## Test logical input
 %!test
 %! y = [0  1  0  0  1  0  1  1  0  1  0  0  0  0  0  0  1  0];
 %! [n, x] = hist (y, 10);
 %! [nn, xx] = hist (logical (y), 10);
-%! assert (nn, n)
-%! assert (xx, x)
+%! assert (nn, n);
+%! assert (xx, x);
 %!
 %! ## test again with N > 30 because there's a special case for it
 %! [n, x] = hist (y, 35);
 %! [nn, xx] = hist (logical (y), 35);
-%! assert (nn, n)
-%! assert (xx, x)
+%! assert (nn, n);
+%! assert (xx, x);
 
 ## Second output argument must be of class single if data is class single.
 %!assert (class (nthargout (2, @hist, rand (10, 1, "single"))), "single")
 
 ## Handle second argument correctly, even when it's class integer
 %!test
-%! y = [2.4 2.5 2.6 5.4 5.5 5.6];
-%! n = [2  3  1];
-%! x = [1  4  7];
+%! y = [2.4, 2.5, 2.6, 5.4, 5.5, 5.6];
+%! n = [2, 3, 1];
+%! x = [1, 4, 7];
 %! [nn, xx] = hist (y, uint8 ([1 4 7]));
-%! assert (nn, n)
-%! assert (xx, x)
+%! assert (nn, n);
+%! assert (xx, x);
 
+## Test bin centers
 %!test
-%! y = [2.4 2.5 2.6 5.4 5.5 5.6];
+%! y = [2.4, 2.5, 2.6, 5.4, 5.5, 5.6];
 %! s = (5.6 - 2.4) / 6;
-%! x = [2.4+s 4.0 5.6-s];
-%! n = [3 0 3];
+%! x = [2.4+s, 4.0, 5.6-s];
+%! n = [3, 0, 3];
 %!
 %! [nn, xx] = hist (y, 3);
-%! assert (nn, n)
-%! assert (xx, x)
+%! assert (nn, n);
+%! assert (xx, x, 2*eps);
 %!
 %! [nn, xx] = hist (y, uint8 (3));
-%! assert (nn, n)
-%! assert (xx, x)
+%! assert (nn, n);
+%! assert (xx, x, 2*eps);
 %!
 %! [nn, xx] = hist (y, single (3));
-%! assert (nn, n)
-%! assert (xx, single (x))
+%! assert (nn, n);
+%! assert (xx, single (x), 2*eps ("single"));
+
+%!test <*53199>
+%! a = [  1,  2,  3,  4, 0;
+%!        5,  4,  6,  7, 8;
+%!        9, 12, 11, 10, 0;
+%!       13, 16, 15, 14, 0;
+%!       17, 20, 19, 18, 0;
+%!       21, 22, 23,  2, 0;
+%!       24, 27, 26, 25, 0;
+%!       28, 31, 30, 29, 0;
+%!       32, 35, 34, 33, 0;
+%!       36, 39, 38, 37, 0;
+%!       40, 43, 42, 41, 0;
+%!       44, 47, 46, 45, 0;
+%!       48, 51, 50, 49, 0;
+%!       52, 55, 54, 53, 0];
+%! n = max (a(:));
+%! [cnt1, ctr1] = hist(a(:), 1:n);
+%! [cnt2, ctr2] = hist(a(:), n);
+%! assert (cnt1, cnt2);
+%! assert (ctr1, 1:n);
+%! assert (ctr2, 0.5:n);
+
+## Test Infinite values and calculation of bins
+%!test
+%! y = [-Inf, NaN, 10, Inf, 0];
+%! [nn, xx] = hist (y);
+%! assert (nn, [2 0 0 0 0 0 0 0 0 2]);
+%! assert (xx, 0.5:10);
+
+## Test input validation
+%!error hist ()
+%!error <Y must be real-valued> hist (2+i)
+%!error <bin specification must be a numeric> hist (1, {0,1,2})
+%!error <number of bins NBINS must be positive> hist (1, 0)
+%!test
+%! hf = figure ("visible", "off");
+%! hax = gca;
+%! unwind_protect
+%!   fail ("hist (hax, 1, [2 1 0])", "warning", "bin values X not sorted"); 
+%! unwind_protect_cleanup
+%!   close (hf);
+%! end_unwind_protect
+%!error <bin specification must be a scalar or vector> hist (1, ones (2,2))
+%!error <NORM must be a numeric constant> hist (1,1, {1})
+%!error <NORM must be a numeric constant . 0> hist (1,1, -1)