Mercurial > octave-nkf
diff scripts/statistics/base/histc.m @ 11436:e151e23f73bc
Overhaul base statistics functions and documentation of same.
Add or improve input validation.
Add input validation tests.
Add functional tests.
Improve or re-write documentation strings.
author | Rik <octave@nomad.inbox5.com> |
---|---|
date | Mon, 03 Jan 2011 21:23:08 -0800 |
parents | fe3c3dfc07eb |
children | fd0a3ac60b0e |
line wrap: on
line diff
--- a/scripts/statistics/base/histc.m Mon Jan 03 18:36:49 2011 -0800 +++ b/scripts/statistics/base/histc.m Mon Jan 03 21:23:08 2011 -0800 @@ -18,39 +18,38 @@ ## <http://www.gnu.org/licenses/>. ## -*- texinfo -*- -## @deftypefn {Function File} {@var{n} =} histc (@var{y}, @var{edges}) -## @deftypefnx {Function File} {@var{n} =} histc (@var{y}, @var{edges}, @var{dim}) +## @deftypefn {Function File} {@var{n} =} histc (@var{x}, @var{edges}) +## @deftypefnx {Function File} {@var{n} =} histc (@var{x}, @var{edges}, @var{dim}) ## @deftypefnx {Function File} {[@var{n}, @var{idx}] =} histc (@dots{}) ## Produce histogram counts. ## -## When @var{y} is a vector, the function counts the number of elements of -## @var{y} that fall in the histogram bins defined by @var{edges}. This must be -## a vector of monotonically non-decreasing values that define the edges of the -## histogram bins. So, @code{@var{n} (k)} contains the number of elements in -## @var{y} for which @code{@var{edges} (k) <= @var{y} < @var{edges} (k+1)}. -## The final element of @var{n} contains the number of elements of @var{y} -## that was equal to the last element of @var{edges}. +## When @var{x} is a vector, the function counts the number of elements of +## @var{x} that fall in the histogram bins defined by @var{edges}. This must be +## a vector of monotonically increasing values that define the edges of the +## histogram bins. @code{@var{n}(k)} contains the number of elements in +## @var{x} for which @code{@var{edges}(k) <= @var{x} < @var{edges}(k+1)}. +## The final element of @var{n} contains the number of elements of @var{x} +## exactly equal to the last element of @var{edges}. ## -## When @var{y} is a @math{N}-dimensional array, the same operation as above is -## repeated along dimension @var{dim}. If not specified @var{dim} defaults +## When @var{x} is an @math{N}-dimensional array, the computation is +## carried out along dimension @var{dim}. If not specified @var{dim} defaults ## to the first non-singleton dimension. ## -## If a second output argument is requested an index matrix is also returned. -## The @var{idx} matrix has same size as @var{y}. Each element of @var{idx} +## When a second output argument is requested an index matrix is also returned. +## The @var{idx} matrix has the same size as @var{x}. Each element of @var{idx} ## contains the index of the histogram bin in which the corresponding element -## of @var{y} was counted. -## +## of @var{x} was counted. ## @seealso{hist} ## @end deftypefn -function [n, idx] = histc (data, edges, dim) - ## Check input +function [n, idx] = histc (x, edges, dim) + if (nargin < 2 || nargin > 3) print_usage (); endif - if (!isreal (data)) - error ("histc: Y argument must be real-valued, not complex"); + if (!isreal (x)) + error ("histc: X argument must be real-valued, not complex"); endif num_edges = numel (edges); @@ -63,14 +62,14 @@ else ## Make sure 'edges' is sorted edges = edges (:); - if (! issorted (edges) || edges(1) > edges(end)) + if (!issorted (edges) || edges(1) > edges(end)) warning ("histc: edge values not sorted on input"); edges = sort (edges); endif endif - nd = ndims (data); - sz = size (data); + nd = ndims (x); + sz = size (x); if (nargin < 3) ## Find the first non-singleton dimension. dim = find (sz > 1, 1); @@ -78,14 +77,14 @@ dim = 1; endif else - if (!(isscalar (dim) && dim == round (dim)) + if (!(isscalar (dim) && dim == fix (dim)) || !(1 <= dim && dim <= nd)) error ("histc: DIM must be an integer and a valid dimension"); endif endif nsz = sz; - nsz (dim) = num_edges; + nsz(dim) = num_edges; ## the splitting point is 3 bins @@ -104,22 +103,22 @@ ## Prepare indices idx1 = cell (1, dim-1); for k = 1:length (idx1) - idx1 {k} = 1:sz (k); + idx1 {k} = 1:sz(k); endfor idx2 = cell (length (sz) - dim); for k = 1:length (idx2) - idx2 {k} = 1:sz (k+dim); + idx2 {k} = 1:sz(k+dim); endfor ## Compute the histograms for k = 1:num_edges-1 - b = (edges (k) <= data & data < edges (k+1)); + b = (edges (k) <= x & x < edges (k+1)); n (idx1 {:}, k, idx2 {:}) = sum (b, dim); if (nargout > 1) idx (b) = k; endif endfor - b = (data == edges (end)); + b = (x == edges (end)); n (idx1 {:}, num_edges, idx2 {:}) = sum (b, dim); if (nargout > 1) idx (b) = num_edges; @@ -130,48 +129,50 @@ ## This is the O(M*log(N) + N) algorithm. ## Look-up indices. - idx = lookup (edges, data); - ## Zero invalid ones (including NaNs). data < edges(1) are already zero. - idx(! (data <= edges(end))) = 0; + idx = lookup (edges, x); + ## Zero invalid ones (including NaNs). x < edges(1) are already zero. + idx(! (x <= edges(end))) = 0; - ## Don't accumulate the histogram if not needed. In that case, - ## histc() is just a (Matlab-compatible) wrapper for lookup. - if (isargout (1)) - iidx = idx; + iidx = idx; - ## In case of matrix input, we adjust the indices. - if (! isvector (data)) - nl = prod (sz(1:dim-1)); - nn = sz(dim); - nu = prod (sz(dim+1:end)); - if (nl != 1) - iidx = (iidx-1) * nl; - iidx += reshape (kron (ones (1, nn*nu), 1:nl), sz); - endif - if (nu != 1) - ne =length (edges); - iidx += reshape (kron (nl*ne*(0:nu-1), ones (1, nl*nn)), sz); - endif + ## In case of matrix input, we adjust the indices. + if (! isvector (x)) + nl = prod (sz(1:dim-1)); + nn = sz(dim); + nu = prod (sz(dim+1:end)); + if (nl != 1) + iidx = (iidx-1) * nl; + iidx += reshape (kron (ones (1, nn*nu), 1:nl), sz); endif - - ## Select valid elements. - iidx = iidx(idx != 0); + if (nu != 1) + ne =length (edges); + iidx += reshape (kron (nl*ne*(0:nu-1), ones (1, nl*nn)), sz); + endif + endif - ## Call accumarray to sum the indexed elements. - n = accumarray (iidx(:), 1, nsz); - endif + ## Select valid elements. + iidx = iidx(idx != 0); + + ## Call accumarray to sum the indexed elements. + n = accumarray (iidx(:), 1, nsz); endif endfunction %!test -%! data = linspace (0, 10, 1001); -%! n = histc (data, 0:10); +%! x = linspace (0, 10, 1001); +%! n = histc (x, 0:10); %! assert (n, [repmat(100, 1, 10), 1]); %!test -%! data = repmat (linspace (0, 10, 1001), [2, 1, 3]); -%! n = histc (data, 0:10, 2); +%! x = repmat (linspace (0, 10, 1001), [2, 1, 3]); +%! n = histc (x, 0:10, 2); %! assert (n, repmat ([repmat(100, 1, 10), 1], [2, 1, 3])); +%!error histc (); +%!error histc (1); +%!error histc (1, 2, 3, 4); +%!error histc ([1:10 1+i], 2); +%!error histc (1:10, []); +%!error histc (1, 1, 3);