Mercurial > octave-nkf
comparison scripts/statistics/base/histc.m @ 11436:e151e23f73bc
Overhaul base statistics functions and documentation of same.
Add or improve input validation.
Add input validation tests.
Add functional tests.
Improve or re-write documentation strings.
author | Rik <octave@nomad.inbox5.com> |
---|---|
date | Mon, 03 Jan 2011 21:23:08 -0800 |
parents | fe3c3dfc07eb |
children | fd0a3ac60b0e |
comparison
equal
deleted
inserted
replaced
11435:20f53b3a558f | 11436:e151e23f73bc |
---|---|
16 ## You should have received a copy of the GNU General Public License | 16 ## You should have received a copy of the GNU General Public License |
17 ## along with Octave; see the file COPYING. If not, see | 17 ## along with Octave; see the file COPYING. If not, see |
18 ## <http://www.gnu.org/licenses/>. | 18 ## <http://www.gnu.org/licenses/>. |
19 | 19 |
20 ## -*- texinfo -*- | 20 ## -*- texinfo -*- |
21 ## @deftypefn {Function File} {@var{n} =} histc (@var{y}, @var{edges}) | 21 ## @deftypefn {Function File} {@var{n} =} histc (@var{x}, @var{edges}) |
22 ## @deftypefnx {Function File} {@var{n} =} histc (@var{y}, @var{edges}, @var{dim}) | 22 ## @deftypefnx {Function File} {@var{n} =} histc (@var{x}, @var{edges}, @var{dim}) |
23 ## @deftypefnx {Function File} {[@var{n}, @var{idx}] =} histc (@dots{}) | 23 ## @deftypefnx {Function File} {[@var{n}, @var{idx}] =} histc (@dots{}) |
24 ## Produce histogram counts. | 24 ## Produce histogram counts. |
25 ## | 25 ## |
26 ## When @var{y} is a vector, the function counts the number of elements of | 26 ## When @var{x} is a vector, the function counts the number of elements of |
27 ## @var{y} that fall in the histogram bins defined by @var{edges}. This must be | 27 ## @var{x} that fall in the histogram bins defined by @var{edges}. This must be |
28 ## a vector of monotonically non-decreasing values that define the edges of the | 28 ## a vector of monotonically increasing values that define the edges of the |
29 ## histogram bins. So, @code{@var{n} (k)} contains the number of elements in | 29 ## histogram bins. @code{@var{n}(k)} contains the number of elements in |
30 ## @var{y} for which @code{@var{edges} (k) <= @var{y} < @var{edges} (k+1)}. | 30 ## @var{x} for which @code{@var{edges}(k) <= @var{x} < @var{edges}(k+1)}. |
31 ## The final element of @var{n} contains the number of elements of @var{y} | 31 ## The final element of @var{n} contains the number of elements of @var{x} |
32 ## that was equal to the last element of @var{edges}. | 32 ## exactly equal to the last element of @var{edges}. |
33 ## | 33 ## |
34 ## When @var{y} is a @math{N}-dimensional array, the same operation as above is | 34 ## When @var{x} is an @math{N}-dimensional array, the computation is |
35 ## repeated along dimension @var{dim}. If not specified @var{dim} defaults | 35 ## carried out along dimension @var{dim}. If not specified @var{dim} defaults |
36 ## to the first non-singleton dimension. | 36 ## to the first non-singleton dimension. |
37 ## | 37 ## |
38 ## If a second output argument is requested an index matrix is also returned. | 38 ## When a second output argument is requested an index matrix is also returned. |
39 ## The @var{idx} matrix has same size as @var{y}. Each element of @var{idx} | 39 ## The @var{idx} matrix has the same size as @var{x}. Each element of @var{idx} |
40 ## contains the index of the histogram bin in which the corresponding element | 40 ## contains the index of the histogram bin in which the corresponding element |
41 ## of @var{y} was counted. | 41 ## of @var{x} was counted. |
42 ## | |
43 ## @seealso{hist} | 42 ## @seealso{hist} |
44 ## @end deftypefn | 43 ## @end deftypefn |
45 | 44 |
46 function [n, idx] = histc (data, edges, dim) | 45 function [n, idx] = histc (x, edges, dim) |
47 ## Check input | 46 |
48 if (nargin < 2 || nargin > 3) | 47 if (nargin < 2 || nargin > 3) |
49 print_usage (); | 48 print_usage (); |
50 endif | 49 endif |
51 | 50 |
52 if (!isreal (data)) | 51 if (!isreal (x)) |
53 error ("histc: Y argument must be real-valued, not complex"); | 52 error ("histc: X argument must be real-valued, not complex"); |
54 endif | 53 endif |
55 | 54 |
56 num_edges = numel (edges); | 55 num_edges = numel (edges); |
57 if (num_edges == 0) | 56 if (num_edges == 0) |
58 error ("histc: EDGES must not be empty") | 57 error ("histc: EDGES must not be empty") |
61 if (!isreal (edges)) | 60 if (!isreal (edges)) |
62 error ("histc: EDGES must be real-valued, not complex"); | 61 error ("histc: EDGES must be real-valued, not complex"); |
63 else | 62 else |
64 ## Make sure 'edges' is sorted | 63 ## Make sure 'edges' is sorted |
65 edges = edges (:); | 64 edges = edges (:); |
66 if (! issorted (edges) || edges(1) > edges(end)) | 65 if (!issorted (edges) || edges(1) > edges(end)) |
67 warning ("histc: edge values not sorted on input"); | 66 warning ("histc: edge values not sorted on input"); |
68 edges = sort (edges); | 67 edges = sort (edges); |
69 endif | 68 endif |
70 endif | 69 endif |
71 | 70 |
72 nd = ndims (data); | 71 nd = ndims (x); |
73 sz = size (data); | 72 sz = size (x); |
74 if (nargin < 3) | 73 if (nargin < 3) |
75 ## Find the first non-singleton dimension. | 74 ## Find the first non-singleton dimension. |
76 dim = find (sz > 1, 1); | 75 dim = find (sz > 1, 1); |
77 if (isempty (dim)) | 76 if (isempty (dim)) |
78 dim = 1; | 77 dim = 1; |
79 endif | 78 endif |
80 else | 79 else |
81 if (!(isscalar (dim) && dim == round (dim)) | 80 if (!(isscalar (dim) && dim == fix (dim)) |
82 || !(1 <= dim && dim <= nd)) | 81 || !(1 <= dim && dim <= nd)) |
83 error ("histc: DIM must be an integer and a valid dimension"); | 82 error ("histc: DIM must be an integer and a valid dimension"); |
84 endif | 83 endif |
85 endif | 84 endif |
86 | 85 |
87 nsz = sz; | 86 nsz = sz; |
88 nsz (dim) = num_edges; | 87 nsz(dim) = num_edges; |
89 | 88 |
90 ## the splitting point is 3 bins | 89 ## the splitting point is 3 bins |
91 | 90 |
92 if (num_edges <= 3) | 91 if (num_edges <= 3) |
93 | 92 |
102 endif | 101 endif |
103 | 102 |
104 ## Prepare indices | 103 ## Prepare indices |
105 idx1 = cell (1, dim-1); | 104 idx1 = cell (1, dim-1); |
106 for k = 1:length (idx1) | 105 for k = 1:length (idx1) |
107 idx1 {k} = 1:sz (k); | 106 idx1 {k} = 1:sz(k); |
108 endfor | 107 endfor |
109 idx2 = cell (length (sz) - dim); | 108 idx2 = cell (length (sz) - dim); |
110 for k = 1:length (idx2) | 109 for k = 1:length (idx2) |
111 idx2 {k} = 1:sz (k+dim); | 110 idx2 {k} = 1:sz(k+dim); |
112 endfor | 111 endfor |
113 | 112 |
114 ## Compute the histograms | 113 ## Compute the histograms |
115 for k = 1:num_edges-1 | 114 for k = 1:num_edges-1 |
116 b = (edges (k) <= data & data < edges (k+1)); | 115 b = (edges (k) <= x & x < edges (k+1)); |
117 n (idx1 {:}, k, idx2 {:}) = sum (b, dim); | 116 n (idx1 {:}, k, idx2 {:}) = sum (b, dim); |
118 if (nargout > 1) | 117 if (nargout > 1) |
119 idx (b) = k; | 118 idx (b) = k; |
120 endif | 119 endif |
121 endfor | 120 endfor |
122 b = (data == edges (end)); | 121 b = (x == edges (end)); |
123 n (idx1 {:}, num_edges, idx2 {:}) = sum (b, dim); | 122 n (idx1 {:}, num_edges, idx2 {:}) = sum (b, dim); |
124 if (nargout > 1) | 123 if (nargout > 1) |
125 idx (b) = num_edges; | 124 idx (b) = num_edges; |
126 endif | 125 endif |
127 | 126 |
128 else | 127 else |
129 | 128 |
130 ## This is the O(M*log(N) + N) algorithm. | 129 ## This is the O(M*log(N) + N) algorithm. |
131 | 130 |
132 ## Look-up indices. | 131 ## Look-up indices. |
133 idx = lookup (edges, data); | 132 idx = lookup (edges, x); |
134 ## Zero invalid ones (including NaNs). data < edges(1) are already zero. | 133 ## Zero invalid ones (including NaNs). x < edges(1) are already zero. |
135 idx(! (data <= edges(end))) = 0; | 134 idx(! (x <= edges(end))) = 0; |
136 | 135 |
137 ## Don't accumulate the histogram if not needed. In that case, | 136 iidx = idx; |
138 ## histc() is just a (Matlab-compatible) wrapper for lookup. | |
139 if (isargout (1)) | |
140 iidx = idx; | |
141 | 137 |
142 ## In case of matrix input, we adjust the indices. | 138 ## In case of matrix input, we adjust the indices. |
143 if (! isvector (data)) | 139 if (! isvector (x)) |
144 nl = prod (sz(1:dim-1)); | 140 nl = prod (sz(1:dim-1)); |
145 nn = sz(dim); | 141 nn = sz(dim); |
146 nu = prod (sz(dim+1:end)); | 142 nu = prod (sz(dim+1:end)); |
147 if (nl != 1) | 143 if (nl != 1) |
148 iidx = (iidx-1) * nl; | 144 iidx = (iidx-1) * nl; |
149 iidx += reshape (kron (ones (1, nn*nu), 1:nl), sz); | 145 iidx += reshape (kron (ones (1, nn*nu), 1:nl), sz); |
150 endif | |
151 if (nu != 1) | |
152 ne =length (edges); | |
153 iidx += reshape (kron (nl*ne*(0:nu-1), ones (1, nl*nn)), sz); | |
154 endif | |
155 endif | 146 endif |
147 if (nu != 1) | |
148 ne =length (edges); | |
149 iidx += reshape (kron (nl*ne*(0:nu-1), ones (1, nl*nn)), sz); | |
150 endif | |
151 endif | |
156 | 152 |
157 ## Select valid elements. | 153 ## Select valid elements. |
158 iidx = iidx(idx != 0); | 154 iidx = iidx(idx != 0); |
159 | 155 |
160 ## Call accumarray to sum the indexed elements. | 156 ## Call accumarray to sum the indexed elements. |
161 n = accumarray (iidx(:), 1, nsz); | 157 n = accumarray (iidx(:), 1, nsz); |
162 endif | |
163 | 158 |
164 endif | 159 endif |
165 | 160 |
166 endfunction | 161 endfunction |
167 | 162 |
168 %!test | 163 %!test |
169 %! data = linspace (0, 10, 1001); | 164 %! x = linspace (0, 10, 1001); |
170 %! n = histc (data, 0:10); | 165 %! n = histc (x, 0:10); |
171 %! assert (n, [repmat(100, 1, 10), 1]); | 166 %! assert (n, [repmat(100, 1, 10), 1]); |
172 | 167 |
173 %!test | 168 %!test |
174 %! data = repmat (linspace (0, 10, 1001), [2, 1, 3]); | 169 %! x = repmat (linspace (0, 10, 1001), [2, 1, 3]); |
175 %! n = histc (data, 0:10, 2); | 170 %! n = histc (x, 0:10, 2); |
176 %! assert (n, repmat ([repmat(100, 1, 10), 1], [2, 1, 3])); | 171 %! assert (n, repmat ([repmat(100, 1, 10), 1], [2, 1, 3])); |
177 | 172 |
173 %!error histc (); | |
174 %!error histc (1); | |
175 %!error histc (1, 2, 3, 4); | |
176 %!error histc ([1:10 1+i], 2); | |
177 %!error histc (1:10, []); | |
178 %!error histc (1, 1, 3); |