Mercurial > octave-libgccjit
annotate scripts/statistics/base/histc.m @ 8937:f27b2c95817f
slightly simplify histc
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Mon, 09 Mar 2009 16:03:17 +0100 |
parents | cae073411b03 |
children | 1bf0ce0930be |
rev | line source |
---|---|
8932 | 1 ## Copyright (C) 2009, Søren Hauberg |
8935
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
2 ## Copyright (C) 2009 VZLU Prague |
8932 | 3 ## |
4 ## This file is part of Octave. | |
5 ## | |
6 ## Octave is free software; you can redistribute it and/or modify it | |
7 ## under the terms of the GNU General Public License as published by | |
8933
346fde2030b5
scripts/statistics/base/histc.m: update copyright notice to match the rest of Octave
Soren Hauberg <hauberg@gmail.com>
parents:
8932
diff
changeset
|
8 ## the Free Software Foundation; either version 3 of the License, or (at |
346fde2030b5
scripts/statistics/base/histc.m: update copyright notice to match the rest of Octave
Soren Hauberg <hauberg@gmail.com>
parents:
8932
diff
changeset
|
9 ## your option) any later version. |
8932 | 10 ## |
11 ## Octave is distributed in the hope that it will be useful, but | |
12 ## WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 ## General Public License for more details. | |
15 ## | |
16 ## You should have received a copy of the GNU General Public License | |
8933
346fde2030b5
scripts/statistics/base/histc.m: update copyright notice to match the rest of Octave
Soren Hauberg <hauberg@gmail.com>
parents:
8932
diff
changeset
|
17 ## along with Octave; see the file COPYING. If not, see |
346fde2030b5
scripts/statistics/base/histc.m: update copyright notice to match the rest of Octave
Soren Hauberg <hauberg@gmail.com>
parents:
8932
diff
changeset
|
18 ## <http://www.gnu.org/licenses/>. |
8932 | 19 |
20 ## -*- texinfo -*- | |
21 ## @deftypefn {Function File} {@var{n} =} histc (@var{y}, @var{edges}) | |
22 ## @deftypefnx {Function File} {@var{n} =} histc (@var{y}, @var{edges}, @var{dim}) | |
23 ## @deftypefnx {Function File} {[@var{n}, @var{idx}] =} histc (...) | |
24 ## Produce histogram counts. | |
25 ## | |
26 ## When @var{y} is a vector, the function counts the number of elements of | |
27 ## @var{y} that fall in the histogram bins defined by @var{edges}. This must be | |
28 ## a vector of monotonically non-decreasing values that define the edges of the | |
29 ## histogram bins. So, @code{@var{n} (k)} contains the number of elements in | |
30 ## @var{y} for which @code{@var{edges} (k) <= @var{y} < @var{edges} (k+1)}. | |
31 ## The final element of @var{n} contains the number of elements of @var{y} | |
32 ## that was equal to the last element of @var{edges}. | |
33 ## | |
34 ## When @var{y} is a @math{N}-dimensional array, the same operation as above is | |
35 ## repeated along dimension @var{dim}. If this argument is given, the operation | |
36 ## is performed along the first non-singleton dimension. | |
37 ## | |
38 ## If a second output argument is requested an index matrix is also returned. | |
39 ## The @var{idx} matrix has same size as @var{y}. Each element of @var{idx} | |
40 ## contains the index of the histogram bin in which the corresponding element | |
41 ## of @var{y} was counted. | |
42 ## | |
43 ## @seealso{hist} | |
44 ## @end deftypefn | |
45 | |
46 function [n, idx] = histc (data, edges, dim) | |
47 ## Check input | |
48 if (nargin < 2) | |
49 print_usage (); | |
50 endif | |
51 | |
52 sz = size (data); | |
53 if (nargin < 3) | |
54 dim = find (sz > 1, 1); | |
8935
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
55 if (isempty (dim)) |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
56 dim = 1; |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
57 endif |
8932 | 58 endif |
59 | |
60 if (!isreal (data)) | |
61 error ("histc: first argument must be real a vector"); | |
62 endif | |
63 | |
64 ## Make sure 'edges' is sorted | |
65 num_edges = numel (edges); | |
8935
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
66 if (num_edges == 0) |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
67 error ("histc: edges must not be empty") |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
68 endif |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
69 |
8932 | 70 if (isreal (edges)) |
71 edges = edges (:); | |
8935
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
72 if (! issorted (edges) || edges(1) > edges(end)) |
8932 | 73 warning ("histc: edge values not sorted on input"); |
8935
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
74 edges = sort (edges); |
8932 | 75 endif |
76 else | |
77 error ("histc: second argument must be a vector"); | |
78 endif | |
79 | |
80 nsz = sz; | |
81 nsz (dim) = num_edges; | |
82 | |
8935
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
83 ## the splitting point is 3 bins |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
84 |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
85 if (num_edges <= 3) |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
86 |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
87 ## This is the O(M*N) algorithm. |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
88 |
8937
f27b2c95817f
slightly simplify histc
Jaroslav Hajek <highegg@gmail.com>
parents:
8935
diff
changeset
|
89 ## Allocate the histogram |
f27b2c95817f
slightly simplify histc
Jaroslav Hajek <highegg@gmail.com>
parents:
8935
diff
changeset
|
90 n = zeros (nsz); |
f27b2c95817f
slightly simplify histc
Jaroslav Hajek <highegg@gmail.com>
parents:
8935
diff
changeset
|
91 |
8935
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
92 ## Allocate 'idx' |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
93 if (nargout > 1) |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
94 idx = zeros (sz); |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
95 endif |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
96 |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
97 ## Prepare indices |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
98 idx1 = cell (1, dim-1); |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
99 for k = 1:length (idx1) |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
100 idx1 {k} = 1:sz (k); |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
101 endfor |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
102 idx2 = cell (length (sz) - dim); |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
103 for k = 1:length (idx2) |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
104 idx2 {k} = 1:sz (k+dim); |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
105 endfor |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
106 |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
107 ## Compute the histograms |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
108 for k = 1:num_edges-1 |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
109 b = (edges (k) <= data & data < edges (k+1)); |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
110 n (idx1 {:}, k, idx2 {:}) = sum (b, dim); |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
111 if (nargout > 1) |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
112 idx (b) = k; |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
113 endif |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
114 endfor |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
115 b = (data == edges (end)); |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
116 n (idx1 {:}, num_edges, idx2 {:}) = sum (b, dim); |
8932 | 117 if (nargout > 1) |
8935
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
118 idx (b) = num_edges; |
8932 | 119 endif |
8935
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
120 |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
121 else |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
122 |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
123 ## This is the O(M*log(N) + N) algorithm. |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
124 |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
125 ## Look-up indices. |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
126 idx = lookup (edges, data); |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
127 ## Zero invalid ones (including NaNs). data < edges(1) are already zero. |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
128 idx(! (data <= edges(end))) = 0; |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
129 |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
130 iidx = idx; |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
131 |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
132 ## In case of matrix input, we adjust the indices. |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
133 if (! isvector (data)) |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
134 nl = prod (sz(1:dim-1)); |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
135 nn = sz(dim); |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
136 nu = prod (sz(dim+1:end)); |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
137 if (nl != 1) |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
138 iidx = (iidx-1) * nl; |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
139 iidx += reshape (kron (ones (1, nn*nu), 1:nl), sz); |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
140 endif |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
141 if (nu != 1) |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
142 ne =length (edges); |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
143 iidx += reshape (kron (nl*ne*(0:nu-1), ones (1, nl*nn)), sz); |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
144 endif |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
145 endif |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
146 |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
147 ## Select valid elements. |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
148 iidx = iidx(idx != 0); |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
149 |
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
150 ## Call accumarray to sum the indexed elements. |
8937
f27b2c95817f
slightly simplify histc
Jaroslav Hajek <highegg@gmail.com>
parents:
8935
diff
changeset
|
151 n = accumarray (iidx(:), 1, nsz); |
8935
cae073411b03
optimize histc implementation
Jaroslav Hajek <highegg@gmail.com>
parents:
8933
diff
changeset
|
152 |
8932 | 153 endif |
154 | |
155 endfunction | |
156 | |
157 %!test | |
158 %! data = linspace (0, 10, 1001); | |
159 %! n = histc (data, 0:10); | |
160 %! assert (n, [repmat(100, 1, 10), 1]); | |
161 | |
162 %!test | |
163 %! data = repmat (linspace (0, 10, 1001), [2, 1, 3]); | |
164 %! n = histc (data, 0:10, 2); | |
165 %! assert (n, repmat ([repmat(100, 1, 10), 1], [2, 1, 3])); | |
166 |