Mercurial > octave-nkf
comparison scripts/statistics/distributions/binoinv.m @ 20642:9d2023d1a63c
binoinv.m: Implement binary search algorithm for 28X performance increase (bug #34363).
* binoinv.m: Call new functions scalar_binoinv or vector_binoinv to calculate
binoinv. If there are still uncalculated values then call bin_search_binoinv
to perform binary search for remaining values. Add more BIST tests.
* binoinv.m (scalar_binoinv): New subfunction to calculate binoinv for scalar x.
Stops when x > 1000.
* binoinv.m (vector_binoinv): New subfunction to calculate binoinv for scalar x.
Stops when x > 1000.
author | Lachlan Andrew <lachlanbis@gmail.com> |
---|---|
date | Sun, 11 Oct 2015 19:49:40 -0700 |
parents | d9341b422488 |
children | 4e307c55a2b5 |
comparison
equal
deleted
inserted
replaced
20641:c3c052b9192a | 20642:9d2023d1a63c |
---|---|
1 ## Copyright (C) 2012 Rik Wehbring | 1 ## Copyright (C) 2015 Lachlan Andrew |
2 ## Copyright (C) 1995-2015 Kurt Hornik | 2 ## Copyright (C) 2012-2015 Rik Wehbring |
3 ## Copyright (C) 1995-2012 Kurt Hornik | |
3 ## | 4 ## |
4 ## This file is part of Octave. | 5 ## This file is part of Octave. |
5 ## | 6 ## |
6 ## Octave is free software; you can redistribute it and/or modify it | 7 ## Octave is free software; you can redistribute it and/or modify it |
7 ## under the terms of the GNU General Public License as published by | 8 ## under the terms of the GNU General Public License as published by |
22 ## For each element of @var{x}, compute the quantile (the inverse of the CDF) | 23 ## For each element of @var{x}, compute the quantile (the inverse of the CDF) |
23 ## at @var{x} of the binomial distribution with parameters | 24 ## at @var{x} of the binomial distribution with parameters |
24 ## @var{n} and @var{p}, where @var{n} is the number of trials and | 25 ## @var{n} and @var{p}, where @var{n} is the number of trials and |
25 ## @var{p} is the probability of success. | 26 ## @var{p} is the probability of success. |
26 ## @end deftypefn | 27 ## @end deftypefn |
27 | |
28 ## Author: KH <Kurt.Hornik@wu-wien.ac.at> | |
29 ## Description: Quantile function of the binomial distribution | |
30 | 28 |
31 function inv = binoinv (x, n, p) | 29 function inv = binoinv (x, n, p) |
32 | 30 |
33 if (nargin != 3) | 31 if (nargin != 3) |
34 print_usage (); | 32 print_usage (); |
56 inv(k) = NaN; | 54 inv(k) = NaN; |
57 | 55 |
58 k = find ((x >= 0) & (x <= 1) & (n >= 0) & (n == fix (n) | 56 k = find ((x >= 0) & (x <= 1) & (n >= 0) & (n == fix (n) |
59 & (p >= 0) & (p <= 1))); | 57 & (p >= 0) & (p <= 1))); |
60 if (any (k)) | 58 if (any (k)) |
59 x = x(k); | |
61 if (isscalar (n) && isscalar (p)) | 60 if (isscalar (n) && isscalar (p)) |
62 cdf = binopdf (0, n, p) * ones (size (k)); | 61 [inv(k), unfinished] = scalar_binoinv (x(:), n, p); |
63 while (any (inv(k) < n)) | 62 k = k(unfinished); |
64 m = find (cdf < x(k)); | 63 if (! isempty (k)) |
65 if (any (m)) | 64 inv(k) = bin_search_binoinv (x(k), n, p); |
66 inv(k(m)) = inv(k(m)) + 1; | 65 endif |
67 cdf(m) = cdf(m) + binopdf (inv(k(m)), n, p); | |
68 else | |
69 break; | |
70 endif | |
71 endwhile | |
72 else | 66 else |
73 cdf = binopdf (0, n(k), p(k)); | 67 [inv(k), unfinished] = vector_binoinv (x(:), n(:), p(:)); |
74 while (any (inv(k) < n(k))) | 68 k = k(unfinished); |
75 m = find (cdf < x(k)); | 69 if (! isempty (k)) |
76 if (any (m)) | 70 inv(k) = bin_search_binoinv (x(k), n(k), p(k)); |
77 inv(k(m)) = inv(k(m)) + 1; | 71 endif |
78 cdf(m) = cdf(m) + binopdf (inv(k(m)), n(k(m)), p(k(m))); | |
79 else | |
80 break; | |
81 endif | |
82 endwhile | |
83 endif | 72 endif |
84 endif | 73 endif |
85 | 74 |
75 endfunction | |
76 | |
77 ## Core algorithm to calculate the inverse binomial, for n and p real scalars | |
78 ## and y a column vector, and for which the output is not NaN or Inf. | |
79 ## Compute CDF in batches of doubling size until CDF > x, or answer > 500 | |
80 ## Return the locations of unfinished cases in k. | |
81 function [m, k] = scalar_binoinv (x, n, p) | |
82 k = 1:length (x); | |
83 m = zeros (size (x)); | |
84 prev_limit = 0; | |
85 limit = 10; | |
86 cdf = 0; | |
87 v = 0; | |
88 do | |
89 cdf = binocdf (prev_limit:limit-1, n, p); | |
90 r = bsxfun (@le, x(k), cdf); | |
91 [v, m(k)] = max (r, [], 2); # find first instance of x <= cdf | |
92 m(k) += prev_limit - 1; | |
93 k = k(v == 0); | |
94 | |
95 prev_limit = limit; | |
96 limit += limit; | |
97 until (isempty (k) || limit >= 1000) | |
98 | |
99 endfunction | |
100 | |
101 ## Core algorithm to calculate the inverse binomial, for n, p, and y column | |
102 ## vectors, and for which the output is not NaN or Inf. | |
103 ## Compute CDF in batches of doubling size until CDF > x, or answer > 500 | |
104 ## Return the locations of unfinished cases in k. | |
105 ## Calculates CDF by summing PDF, which is faster than calls to binocdf. | |
106 function [m, k] = vector_binoinv (x, n, p) | |
107 k = 1:length(x); | |
108 m = zeros (size (x)); | |
109 prev_limit = 0; | |
110 limit = 10; | |
111 cdf = 0; | |
112 v = 0; | |
113 do | |
114 xx = repmat (prev_limit:limit-1, [length(k), 1]); | |
115 nn = kron (ones (1, limit-prev_limit), n(k)); | |
116 pp = kron (ones (1, limit-prev_limit), p(k)); | |
117 pdf = binopdf (xx, nn, pp); | |
118 pdf(:,1) += cdf(v==0, end); | |
119 cdf = cumsum (pdf, 2); | |
120 r = bsxfun (@le, x(k), cdf); | |
121 [v, m(k)] = max (r, [], 2); # find first instance of x <= cdf | |
122 m(k) += prev_limit - 1; | |
123 k = k(v == 0); | |
124 | |
125 prev_limit = limit; | |
126 limit += min (limit, max (1e4/numel (k), 10)); # limit memory use | |
127 until (isempty (k) || limit >= 1000) | |
128 | |
129 endfunction | |
130 | |
131 ## Vectorized binary search. | |
132 ## Can handle vectors n and p, and is faster than the scalar case when the | |
133 ## answer is large. | |
134 ## Could be optimized to call binocdf only for a subset of the x at each stage, | |
135 ## but care must be taken to handle both scalar and vector n, p. Bookkeeping | |
136 ## may cost more than the extra computations. | |
137 function m = bin_search_binoinv (x, n, p) | |
138 k = 1:length (x); | |
139 lower = zeros (size (x)); | |
140 limit = 500; # lower bound on point at which prev phase finished | |
141 while (any (k) && limit < 1e100) | |
142 cdf = binocdf (limit, n, p); | |
143 k = (x > cdf); | |
144 lower(k) = limit; | |
145 limit += limit; | |
146 end | |
147 upper = max (2*lower, 1); | |
148 k = find (lower != limit/2); # elements for which above loop finished | |
149 for i = 1:ceil (log2 (max (lower))) | |
150 mid = (upper + lower)/2; | |
151 cdf = binocdf (floor(mid(:)), n, p); | |
152 r = (x <= cdf); | |
153 upper(r) = mid(r); | |
154 lower(!r) = mid(!r); | |
155 endfor | |
156 m = ceil (lower); | |
157 m(x > binocdf (m(:), n, p)) += 1; # fix off-by-one errors from binary search | |
86 endfunction | 158 endfunction |
87 | 159 |
88 | 160 |
89 %!shared x | 161 %!shared x |
90 %! x = [-1 0 0.5 1 2]; | 162 %! x = [-1 0 0.5 1 2]; |
99 %!assert (binoinv ([x, NaN], 2, 0.5), [NaN 0 1 2 NaN NaN]) | 171 %!assert (binoinv ([x, NaN], 2, 0.5), [NaN 0 1 2 NaN NaN]) |
100 %!assert (binoinv (single ([x, NaN]), 2, 0.5), single ([NaN 0 1 2 NaN NaN])) | 172 %!assert (binoinv (single ([x, NaN]), 2, 0.5), single ([NaN 0 1 2 NaN NaN])) |
101 %!assert (binoinv ([x, NaN], single (2), 0.5), single ([NaN 0 1 2 NaN NaN])) | 173 %!assert (binoinv ([x, NaN], single (2), 0.5), single ([NaN 0 1 2 NaN NaN])) |
102 %!assert (binoinv ([x, NaN], 2, single (0.5)), single ([NaN 0 1 2 NaN NaN])) | 174 %!assert (binoinv ([x, NaN], 2, single (0.5)), single ([NaN 0 1 2 NaN NaN])) |
103 | 175 |
176 ## Test accuracy, to within +/- 1 since it is a discrete distribution | |
177 %!shared y, tol | |
178 %! y = magic (3) + 1; | |
179 %! tol = 1; | |
180 %!assert (binoinv (binocdf (1:10, 11, 0.1), 11, 0.1), 1:10, tol) | |
181 %!assert (binoinv (binocdf (1:10, 2*(1:10), 0.1), 2*(1:10), 0.1), 1:10, tol) | |
182 %!assert (binoinv (binocdf (y, 2*y, 1./y), 2*y, 1./y), y, tol) | |
183 | |
104 ## Test input validation | 184 ## Test input validation |
105 %!error binoinv () | 185 %!error binoinv () |
106 %!error binoinv (1) | 186 %!error binoinv (1) |
107 %!error binoinv (1,2) | 187 %!error binoinv (1,2) |
108 %!error binoinv (1,2,3,4) | 188 %!error binoinv (1,2,3,4) |