comparison scripts/statistics/base/histc.m @ 11436:e151e23f73bc

Overhaul base statistics functions and documentation of same. Add or improve input validation. Add input validation tests. Add functional tests. Improve or re-write documentation strings.
author Rik <octave@nomad.inbox5.com>
date Mon, 03 Jan 2011 21:23:08 -0800
parents fe3c3dfc07eb
children fd0a3ac60b0e
comparison
equal deleted inserted replaced
11435:20f53b3a558f 11436:e151e23f73bc
16 ## You should have received a copy of the GNU General Public License 16 ## You should have received a copy of the GNU General Public License
17 ## along with Octave; see the file COPYING. If not, see 17 ## along with Octave; see the file COPYING. If not, see
18 ## <http://www.gnu.org/licenses/>. 18 ## <http://www.gnu.org/licenses/>.
19 19
20 ## -*- texinfo -*- 20 ## -*- texinfo -*-
21 ## @deftypefn {Function File} {@var{n} =} histc (@var{y}, @var{edges}) 21 ## @deftypefn {Function File} {@var{n} =} histc (@var{x}, @var{edges})
22 ## @deftypefnx {Function File} {@var{n} =} histc (@var{y}, @var{edges}, @var{dim}) 22 ## @deftypefnx {Function File} {@var{n} =} histc (@var{x}, @var{edges}, @var{dim})
23 ## @deftypefnx {Function File} {[@var{n}, @var{idx}] =} histc (@dots{}) 23 ## @deftypefnx {Function File} {[@var{n}, @var{idx}] =} histc (@dots{})
24 ## Produce histogram counts. 24 ## Produce histogram counts.
25 ## 25 ##
26 ## When @var{y} is a vector, the function counts the number of elements of 26 ## When @var{x} is a vector, the function counts the number of elements of
27 ## @var{y} that fall in the histogram bins defined by @var{edges}. This must be 27 ## @var{x} that fall in the histogram bins defined by @var{edges}. This must be
28 ## a vector of monotonically non-decreasing values that define the edges of the 28 ## a vector of monotonically increasing values that define the edges of the
29 ## histogram bins. So, @code{@var{n} (k)} contains the number of elements in 29 ## histogram bins. @code{@var{n}(k)} contains the number of elements in
30 ## @var{y} for which @code{@var{edges} (k) <= @var{y} < @var{edges} (k+1)}. 30 ## @var{x} for which @code{@var{edges}(k) <= @var{x} < @var{edges}(k+1)}.
31 ## The final element of @var{n} contains the number of elements of @var{y} 31 ## The final element of @var{n} contains the number of elements of @var{x}
32 ## that was equal to the last element of @var{edges}. 32 ## exactly equal to the last element of @var{edges}.
33 ## 33 ##
34 ## When @var{y} is a @math{N}-dimensional array, the same operation as above is 34 ## When @var{x} is an @math{N}-dimensional array, the computation is
35 ## repeated along dimension @var{dim}. If not specified @var{dim} defaults 35 ## carried out along dimension @var{dim}. If not specified @var{dim} defaults
36 ## to the first non-singleton dimension. 36 ## to the first non-singleton dimension.
37 ## 37 ##
38 ## If a second output argument is requested an index matrix is also returned. 38 ## When a second output argument is requested an index matrix is also returned.
39 ## The @var{idx} matrix has same size as @var{y}. Each element of @var{idx} 39 ## The @var{idx} matrix has the same size as @var{x}. Each element of @var{idx}
40 ## contains the index of the histogram bin in which the corresponding element 40 ## contains the index of the histogram bin in which the corresponding element
41 ## of @var{y} was counted. 41 ## of @var{x} was counted.
42 ##
43 ## @seealso{hist} 42 ## @seealso{hist}
44 ## @end deftypefn 43 ## @end deftypefn
45 44
46 function [n, idx] = histc (data, edges, dim) 45 function [n, idx] = histc (x, edges, dim)
47 ## Check input 46
48 if (nargin < 2 || nargin > 3) 47 if (nargin < 2 || nargin > 3)
49 print_usage (); 48 print_usage ();
50 endif 49 endif
51 50
52 if (!isreal (data)) 51 if (!isreal (x))
53 error ("histc: Y argument must be real-valued, not complex"); 52 error ("histc: X argument must be real-valued, not complex");
54 endif 53 endif
55 54
56 num_edges = numel (edges); 55 num_edges = numel (edges);
57 if (num_edges == 0) 56 if (num_edges == 0)
58 error ("histc: EDGES must not be empty") 57 error ("histc: EDGES must not be empty")
61 if (!isreal (edges)) 60 if (!isreal (edges))
62 error ("histc: EDGES must be real-valued, not complex"); 61 error ("histc: EDGES must be real-valued, not complex");
63 else 62 else
64 ## Make sure 'edges' is sorted 63 ## Make sure 'edges' is sorted
65 edges = edges (:); 64 edges = edges (:);
66 if (! issorted (edges) || edges(1) > edges(end)) 65 if (!issorted (edges) || edges(1) > edges(end))
67 warning ("histc: edge values not sorted on input"); 66 warning ("histc: edge values not sorted on input");
68 edges = sort (edges); 67 edges = sort (edges);
69 endif 68 endif
70 endif 69 endif
71 70
72 nd = ndims (data); 71 nd = ndims (x);
73 sz = size (data); 72 sz = size (x);
74 if (nargin < 3) 73 if (nargin < 3)
75 ## Find the first non-singleton dimension. 74 ## Find the first non-singleton dimension.
76 dim = find (sz > 1, 1); 75 dim = find (sz > 1, 1);
77 if (isempty (dim)) 76 if (isempty (dim))
78 dim = 1; 77 dim = 1;
79 endif 78 endif
80 else 79 else
81 if (!(isscalar (dim) && dim == round (dim)) 80 if (!(isscalar (dim) && dim == fix (dim))
82 || !(1 <= dim && dim <= nd)) 81 || !(1 <= dim && dim <= nd))
83 error ("histc: DIM must be an integer and a valid dimension"); 82 error ("histc: DIM must be an integer and a valid dimension");
84 endif 83 endif
85 endif 84 endif
86 85
87 nsz = sz; 86 nsz = sz;
88 nsz (dim) = num_edges; 87 nsz(dim) = num_edges;
89 88
90 ## the splitting point is 3 bins 89 ## the splitting point is 3 bins
91 90
92 if (num_edges <= 3) 91 if (num_edges <= 3)
93 92
102 endif 101 endif
103 102
104 ## Prepare indices 103 ## Prepare indices
105 idx1 = cell (1, dim-1); 104 idx1 = cell (1, dim-1);
106 for k = 1:length (idx1) 105 for k = 1:length (idx1)
107 idx1 {k} = 1:sz (k); 106 idx1 {k} = 1:sz(k);
108 endfor 107 endfor
109 idx2 = cell (length (sz) - dim); 108 idx2 = cell (length (sz) - dim);
110 for k = 1:length (idx2) 109 for k = 1:length (idx2)
111 idx2 {k} = 1:sz (k+dim); 110 idx2 {k} = 1:sz(k+dim);
112 endfor 111 endfor
113 112
114 ## Compute the histograms 113 ## Compute the histograms
115 for k = 1:num_edges-1 114 for k = 1:num_edges-1
116 b = (edges (k) <= data & data < edges (k+1)); 115 b = (edges (k) <= x & x < edges (k+1));
117 n (idx1 {:}, k, idx2 {:}) = sum (b, dim); 116 n (idx1 {:}, k, idx2 {:}) = sum (b, dim);
118 if (nargout > 1) 117 if (nargout > 1)
119 idx (b) = k; 118 idx (b) = k;
120 endif 119 endif
121 endfor 120 endfor
122 b = (data == edges (end)); 121 b = (x == edges (end));
123 n (idx1 {:}, num_edges, idx2 {:}) = sum (b, dim); 122 n (idx1 {:}, num_edges, idx2 {:}) = sum (b, dim);
124 if (nargout > 1) 123 if (nargout > 1)
125 idx (b) = num_edges; 124 idx (b) = num_edges;
126 endif 125 endif
127 126
128 else 127 else
129 128
130 ## This is the O(M*log(N) + N) algorithm. 129 ## This is the O(M*log(N) + N) algorithm.
131 130
132 ## Look-up indices. 131 ## Look-up indices.
133 idx = lookup (edges, data); 132 idx = lookup (edges, x);
134 ## Zero invalid ones (including NaNs). data < edges(1) are already zero. 133 ## Zero invalid ones (including NaNs). x < edges(1) are already zero.
135 idx(! (data <= edges(end))) = 0; 134 idx(! (x <= edges(end))) = 0;
136 135
137 ## Don't accumulate the histogram if not needed. In that case, 136 iidx = idx;
138 ## histc() is just a (Matlab-compatible) wrapper for lookup.
139 if (isargout (1))
140 iidx = idx;
141 137
142 ## In case of matrix input, we adjust the indices. 138 ## In case of matrix input, we adjust the indices.
143 if (! isvector (data)) 139 if (! isvector (x))
144 nl = prod (sz(1:dim-1)); 140 nl = prod (sz(1:dim-1));
145 nn = sz(dim); 141 nn = sz(dim);
146 nu = prod (sz(dim+1:end)); 142 nu = prod (sz(dim+1:end));
147 if (nl != 1) 143 if (nl != 1)
148 iidx = (iidx-1) * nl; 144 iidx = (iidx-1) * nl;
149 iidx += reshape (kron (ones (1, nn*nu), 1:nl), sz); 145 iidx += reshape (kron (ones (1, nn*nu), 1:nl), sz);
150 endif
151 if (nu != 1)
152 ne =length (edges);
153 iidx += reshape (kron (nl*ne*(0:nu-1), ones (1, nl*nn)), sz);
154 endif
155 endif 146 endif
147 if (nu != 1)
148 ne =length (edges);
149 iidx += reshape (kron (nl*ne*(0:nu-1), ones (1, nl*nn)), sz);
150 endif
151 endif
156 152
157 ## Select valid elements. 153 ## Select valid elements.
158 iidx = iidx(idx != 0); 154 iidx = iidx(idx != 0);
159 155
160 ## Call accumarray to sum the indexed elements. 156 ## Call accumarray to sum the indexed elements.
161 n = accumarray (iidx(:), 1, nsz); 157 n = accumarray (iidx(:), 1, nsz);
162 endif
163 158
164 endif 159 endif
165 160
166 endfunction 161 endfunction
167 162
168 %!test 163 %!test
169 %! data = linspace (0, 10, 1001); 164 %! x = linspace (0, 10, 1001);
170 %! n = histc (data, 0:10); 165 %! n = histc (x, 0:10);
171 %! assert (n, [repmat(100, 1, 10), 1]); 166 %! assert (n, [repmat(100, 1, 10), 1]);
172 167
173 %!test 168 %!test
174 %! data = repmat (linspace (0, 10, 1001), [2, 1, 3]); 169 %! x = repmat (linspace (0, 10, 1001), [2, 1, 3]);
175 %! n = histc (data, 0:10, 2); 170 %! n = histc (x, 0:10, 2);
176 %! assert (n, repmat ([repmat(100, 1, 10), 1], [2, 1, 3])); 171 %! assert (n, repmat ([repmat(100, 1, 10), 1], [2, 1, 3]));
177 172
173 %!error histc ();
174 %!error histc (1);
175 %!error histc (1, 2, 3, 4);
176 %!error histc ([1:10 1+i], 2);
177 %!error histc (1:10, []);
178 %!error histc (1, 1, 3);