changeset 8932:2d0f8692a82e

Add the 'histc' function
author Soren Hauberg <hauberg@gmail.com>
date Sun, 08 Mar 2009 18:45:58 +0100
parents 92dd386f0f13
children 346fde2030b5
files ChangeLog NEWS doc/ChangeLog doc/interpreter/stats.txi scripts/ChangeLog scripts/statistics/base/Makefile.in scripts/statistics/base/histc.m
diffstat 7 files changed, 153 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Sat Mar 07 22:09:25 2009 +0100
+++ b/ChangeLog	Sun Mar 08 18:45:58 2009 +0100
@@ -1,3 +1,7 @@
+2009-03-08  Søren Hauberg  <hauberg@gmail.com>
+
+	* NEWS: Mention 'histc'.
+
 2009-03-07  John W. Eaton  <jwe@octave.org>
 
 	* config.guess, config.sub: Update from FSF sources.
--- a/NEWS	Sat Mar 07 22:09:25 2009 +0100
+++ b/NEWS	Sun Mar 08 18:45:58 2009 +0100
@@ -261,23 +261,23 @@
 
  ** Other miscellaneous new functions.
 
-      addtodate          idivide                     realpow
-      bicgstab           info                        realsqrt
-      cgs                interp1q                    rectint
-      command_line_path  isdebugmode                 regexptranslate
-      contrast           isfloat                     restoredefaultpath
-      convn              isstrprop                   roundb
-      cummin             log1p                       rundemos
-      cummax             lsqnonneg                   runlength
-      datetick           matlabroot                  saveobj
-      display            namelengthmax               spaugment
-      expm1              nargoutchk                  strchr
-      filemarker         pathdef                     strvcat
-      fstat              perl                        subspace
-      full               prctile                     symvar
-      fzero              quantile                    treelayout
-      genvarname         re_read_readline_init_file  validatestring
-      hypot              reallog
+      addtodate          hypot                       reallog
+      bicgstab           idivide                     realpow
+      cgs                info                        realsqrt
+      command_line_path  interp1q                    rectint
+      contrast           isdebugmode                 regexptranslate
+      convn              isfloat                     restoredefaultpath
+      cummin             isstrprop                   roundb
+      cummax             log1p                       rundemos
+      datetick           lsqnonneg                   runlength
+      display            matlabroot                  saveobj
+      expm1              namelengthmax               spaugment
+      filemarker         nargoutchk                  strchr
+      fstat              pathdef                     strvcat
+      full               perl                        subspace
+      fzero              prctile                     symvar
+      genvarname         quantile                    treelayout
+      histc              re_read_readline_init_file  validatestring
 
  ** Changes to strcat.
 
--- a/doc/ChangeLog	Sat Mar 07 22:09:25 2009 +0100
+++ b/doc/ChangeLog	Sun Mar 08 18:45:58 2009 +0100
@@ -1,3 +1,8 @@
+2009-03-08  Søren Hauberg  <hauberg@gmail.com>
+
+	* interpreter/stats.txi (Basic Statistical Functions):
+	Add the 'histc' function.
+
 2009-03-07  John W. Eaton  <jwe@octave.org>
 
 	* interpreter/basics.txi (Command Line Options):
--- a/doc/interpreter/stats.txi	Sat Mar 07 22:09:25 2009 +0100
+++ b/doc/interpreter/stats.txi	Sun Mar 08 18:45:58 2009 +0100
@@ -100,6 +100,8 @@
 
 @DOCSTRING(nchoosek)
 
+@DOCSTRING(histc)
+
 @DOCSTRING(perms)
 
 @DOCSTRING(values)
--- a/scripts/ChangeLog	Sat Mar 07 22:09:25 2009 +0100
+++ b/scripts/ChangeLog	Sun Mar 08 18:45:58 2009 +0100
@@ -1,3 +1,7 @@
+2009-03-08  Søren Hauberg <hauberg@gmail.com>
+
+	* statistics/base/histc.m: New function.
+
 2009-03-06  Ben Abbott <bpabbott@mac.com>
 
 	* plot/__go_draw_axes__.m: Preserve the order of axes' children
--- a/scripts/statistics/base/Makefile.in	Sat Mar 07 22:09:25 2009 +0100
+++ b/scripts/statistics/base/Makefile.in	Sun Mar 08 18:45:58 2009 +0100
@@ -33,8 +33,8 @@
 INSTALL_DATA = @INSTALL_DATA@
 
 SOURCES = __quantile__.m center.m cloglog.m cor.m corrcoef.m cov.m \
-  cut.m gls.m iqr.m kendall.m kurtosis.m logit.m mahalanobis.m mean.m \
-  meansq.m median.m mode.m moment.m ols.m ppplot.m prctile.m probit.m \
+  cut.m gls.m histc.m iqr.m kendall.m kurtosis.m logit.m mahalanobis.m \
+  mean.m meansq.m median.m mode.m moment.m ols.m ppplot.m prctile.m probit.m \
   qqplot.m quantile.m range.m ranks.m run_count.m skewness.m spearman.m \
   statistics.m std.m studentize.m table.m values.m var.m
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/statistics/base/histc.m	Sun Mar 08 18:45:58 2009 +0100
@@ -0,0 +1,119 @@
+## Copyright (C) 2009, Søren Hauberg
+##
+## This file is part of Octave.
+##
+## Octave is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3, or (at your option)
+## any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, write to the Free
+## Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+## 02110-1301, USA.
+
+## -*- texinfo -*-
+## @deftypefn {Function File} {@var{n} =} histc (@var{y}, @var{edges})
+## @deftypefnx {Function File} {@var{n} =} histc (@var{y}, @var{edges}, @var{dim})
+## @deftypefnx {Function File} {[@var{n}, @var{idx}] =} histc (...)
+## Produce histogram counts.
+##
+## When @var{y} is a vector, the function counts the number of elements of
+## @var{y} that fall in the histogram bins defined by @var{edges}. This must be
+## a vector of monotonically non-decreasing values that define the edges of the
+## histogram bins. So, @code{@var{n} (k)} contains the number of elements in
+## @var{y} for which @code{@var{edges} (k) <= @var{y} < @var{edges} (k+1)}.
+## The final element of @var{n} contains the number of elements of @var{y}
+## that was equal to the last element of @var{edges}.
+##
+## When @var{y} is a @math{N}-dimensional array, the same operation as above is
+## repeated along dimension @var{dim}. If this argument is given, the operation
+## is performed along the first non-singleton dimension.
+##
+## If a second output argument is requested an index matrix is also returned.
+## The @var{idx} matrix has same size as @var{y}. Each element of @var{idx}
+## contains the index of the histogram bin in which the corresponding element
+## of @var{y} was counted.
+##
+## @seealso{hist}
+## @end deftypefn
+
+function [n, idx] = histc (data, edges, dim)
+  ## Check input
+  if (nargin < 2)
+    print_usage ();
+  endif
+
+  sz = size (data);
+  if (nargin < 3)
+    dim = find (sz > 1, 1);
+  endif
+
+  if (!isreal (data))
+    error ("histc: first argument must be real a vector");
+  endif
+  
+  ## Make sure 'edges' is sorted
+  num_edges = numel (edges);
+  if (isreal (edges))
+    edges = edges (:);
+    tmp = sort (edges);
+    if (any (tmp != edges))
+      warning ("histc: edge values not sorted on input");
+      edges = tmp;
+    endif
+  else
+    error ("histc: second argument must be a vector");
+  endif
+
+  ## Allocate the histogram
+  nsz = sz;
+  nsz (dim) = num_edges;
+  n = zeros (nsz);
+  
+  ## Allocate 'idx'
+  if (nargout > 1)
+    idx = zeros (sz);
+  endif
+  
+  ## Prepare indices
+  idx1 = cell (1, dim-1);
+  for k = 1:length (idx1)
+    idx1 {k} = 1:sz (k);
+  endfor
+  idx2 = cell (length (sz) - dim);
+  for k = 1:length (idx2)
+    idx2 {k} = 1:sz (k+dim);
+  endfor
+  
+  ## Compute the histograms
+  for k = 1:num_edges-1
+    b = (edges (k) <= data & data < edges (k+1));
+    n (idx1 {:}, k, idx2 {:}) = sum (b, dim);
+    if (nargout > 1)
+      idx (b) = k;
+    endif
+  endfor
+  b = (data == edges (end));
+  n (idx1 {:}, num_edges, idx2 {:}) = sum (b, dim);
+  if (nargout > 1)
+    idx (b) = num_edges;
+  endif
+
+endfunction
+
+%!test
+%! data = linspace (0, 10, 1001);
+%! n = histc (data, 0:10);
+%! assert (n, [repmat(100, 1, 10), 1]);
+
+%!test
+%! data = repmat (linspace (0, 10, 1001), [2, 1, 3]);
+%! n = histc (data, 0:10, 2);
+%! assert (n, repmat ([repmat(100, 1, 10), 1], [2, 1, 3]));
+