diff scripts/statistics/base/histc.m @ 11436:e151e23f73bc

Overhaul base statistics functions and documentation of same. Add or improve input validation. Add input validation tests. Add functional tests. Improve or re-write documentation strings.
author Rik <octave@nomad.inbox5.com>
date Mon, 03 Jan 2011 21:23:08 -0800
parents fe3c3dfc07eb
children fd0a3ac60b0e
line wrap: on
line diff
--- a/scripts/statistics/base/histc.m	Mon Jan 03 18:36:49 2011 -0800
+++ b/scripts/statistics/base/histc.m	Mon Jan 03 21:23:08 2011 -0800
@@ -18,39 +18,38 @@
 ## <http://www.gnu.org/licenses/>.
 
 ## -*- texinfo -*-
-## @deftypefn  {Function File} {@var{n} =} histc (@var{y}, @var{edges})
-## @deftypefnx {Function File} {@var{n} =} histc (@var{y}, @var{edges}, @var{dim})
+## @deftypefn  {Function File} {@var{n} =} histc (@var{x}, @var{edges})
+## @deftypefnx {Function File} {@var{n} =} histc (@var{x}, @var{edges}, @var{dim})
 ## @deftypefnx {Function File} {[@var{n}, @var{idx}] =} histc (@dots{})
 ## Produce histogram counts.
 ##
-## When @var{y} is a vector, the function counts the number of elements of
-## @var{y} that fall in the histogram bins defined by @var{edges}.  This must be
-## a vector of monotonically non-decreasing values that define the edges of the
-## histogram bins.  So, @code{@var{n} (k)} contains the number of elements in
-## @var{y} for which @code{@var{edges} (k) <= @var{y} < @var{edges} (k+1)}.
-## The final element of @var{n} contains the number of elements of @var{y}
-## that was equal to the last element of @var{edges}.
+## When @var{x} is a vector, the function counts the number of elements of
+## @var{x} that fall in the histogram bins defined by @var{edges}.  This must be
+## a vector of monotonically increasing values that define the edges of the
+## histogram bins.  @code{@var{n}(k)} contains the number of elements in
+## @var{x} for which @code{@var{edges}(k) <= @var{x} < @var{edges}(k+1)}.
+## The final element of @var{n} contains the number of elements of @var{x}
+## exactly equal to the last element of @var{edges}.
 ##
-## When @var{y} is a @math{N}-dimensional array, the same operation as above is
-## repeated along dimension @var{dim}.  If not specified @var{dim} defaults
+## When @var{x} is an @math{N}-dimensional array, the computation is
+## carried out along dimension @var{dim}.  If not specified @var{dim} defaults
 ## to the first non-singleton dimension.
 ##
-## If a second output argument is requested an index matrix is also returned.
-## The @var{idx} matrix has same size as @var{y}.  Each element of @var{idx}
+## When a second output argument is requested an index matrix is also returned.
+## The @var{idx} matrix has the same size as @var{x}.  Each element of @var{idx}
 ## contains the index of the histogram bin in which the corresponding element
-## of @var{y} was counted.
-##
+## of @var{x} was counted.
 ## @seealso{hist}
 ## @end deftypefn
 
-function [n, idx] = histc (data, edges, dim)
-  ## Check input
+function [n, idx] = histc (x, edges, dim)
+
   if (nargin < 2 || nargin > 3)
     print_usage ();
   endif
 
-  if (!isreal (data))
-    error ("histc: Y argument must be real-valued, not complex");
+  if (!isreal (x))
+    error ("histc: X argument must be real-valued, not complex");
   endif
 
   num_edges = numel (edges);
@@ -63,14 +62,14 @@
   else
     ## Make sure 'edges' is sorted
     edges = edges (:);
-    if (! issorted (edges) || edges(1) > edges(end))
+    if (!issorted (edges) || edges(1) > edges(end))
       warning ("histc: edge values not sorted on input");
       edges = sort (edges);
     endif
   endif
 
-  nd = ndims (data);
-  sz = size (data);
+  nd = ndims (x);
+  sz = size (x);
   if (nargin < 3)
     ## Find the first non-singleton dimension.
     dim = find (sz > 1, 1);
@@ -78,14 +77,14 @@
       dim = 1;
     endif
   else
-    if (!(isscalar (dim) && dim == round (dim))
+    if (!(isscalar (dim) && dim == fix (dim))
         || !(1 <= dim && dim <= nd))
       error ("histc: DIM must be an integer and a valid dimension");
     endif
   endif
 
   nsz = sz;
-  nsz (dim) = num_edges;
+  nsz(dim) = num_edges;
   
   ## the splitting point is 3 bins
 
@@ -104,22 +103,22 @@
     ## Prepare indices
     idx1 = cell (1, dim-1);
     for k = 1:length (idx1)
-      idx1 {k} = 1:sz (k);
+      idx1 {k} = 1:sz(k);
     endfor
     idx2 = cell (length (sz) - dim);
     for k = 1:length (idx2)
-      idx2 {k} = 1:sz (k+dim);
+      idx2 {k} = 1:sz(k+dim);
     endfor
     
     ## Compute the histograms
     for k = 1:num_edges-1
-      b = (edges (k) <= data & data < edges (k+1));
+      b = (edges (k) <= x & x < edges (k+1));
       n (idx1 {:}, k, idx2 {:}) = sum (b, dim);
       if (nargout > 1)
         idx (b) = k;
       endif
     endfor
-    b = (data == edges (end));
+    b = (x == edges (end));
     n (idx1 {:}, num_edges, idx2 {:}) = sum (b, dim);
     if (nargout > 1)
       idx (b) = num_edges;
@@ -130,48 +129,50 @@
     ## This is the O(M*log(N) + N) algorithm.
 
     ## Look-up indices.
-    idx = lookup (edges, data);
-    ## Zero invalid ones (including NaNs). data < edges(1) are already zero. 
-    idx(! (data <= edges(end))) = 0;
+    idx = lookup (edges, x);
+    ## Zero invalid ones (including NaNs). x < edges(1) are already zero. 
+    idx(! (x <= edges(end))) = 0;
 
-    ## Don't accumulate the histogram if not needed. In that case,
-    ## histc() is just a (Matlab-compatible) wrapper for lookup.
-    if (isargout (1))
-      iidx = idx;
+    iidx = idx;
 
-      ## In case of matrix input, we adjust the indices.
-      if (! isvector (data))
-        nl = prod (sz(1:dim-1));
-        nn = sz(dim);
-        nu = prod (sz(dim+1:end));
-        if (nl != 1)
-          iidx = (iidx-1) * nl;
-          iidx += reshape (kron (ones (1, nn*nu), 1:nl), sz);
-        endif
-        if (nu != 1)
-          ne =length (edges);
-          iidx += reshape (kron (nl*ne*(0:nu-1), ones (1, nl*nn)), sz);
-        endif
+    ## In case of matrix input, we adjust the indices.
+    if (! isvector (x))
+      nl = prod (sz(1:dim-1));
+      nn = sz(dim);
+      nu = prod (sz(dim+1:end));
+      if (nl != 1)
+        iidx = (iidx-1) * nl;
+        iidx += reshape (kron (ones (1, nn*nu), 1:nl), sz);
       endif
-
-      ## Select valid elements.
-      iidx = iidx(idx != 0);
+      if (nu != 1)
+        ne =length (edges);
+        iidx += reshape (kron (nl*ne*(0:nu-1), ones (1, nl*nn)), sz);
+      endif
+    endif
 
-      ## Call accumarray to sum the indexed elements.
-      n = accumarray (iidx(:), 1, nsz);
-    endif
+    ## Select valid elements.
+    iidx = iidx(idx != 0);
+
+    ## Call accumarray to sum the indexed elements.
+    n = accumarray (iidx(:), 1, nsz);
 
   endif
 
 endfunction
 
 %!test
-%! data = linspace (0, 10, 1001);
-%! n = histc (data, 0:10);
+%! x = linspace (0, 10, 1001);
+%! n = histc (x, 0:10);
 %! assert (n, [repmat(100, 1, 10), 1]);
 
 %!test
-%! data = repmat (linspace (0, 10, 1001), [2, 1, 3]);
-%! n = histc (data, 0:10, 2);
+%! x = repmat (linspace (0, 10, 1001), [2, 1, 3]);
+%! n = histc (x, 0:10, 2);
 %! assert (n, repmat ([repmat(100, 1, 10), 1], [2, 1, 3]));
 
+%!error histc ();
+%!error histc (1);
+%!error histc (1, 2, 3, 4);
+%!error histc ([1:10 1+i], 2);
+%!error histc (1:10, []);
+%!error histc (1, 1, 3);