comparison scripts/statistics/distributions/binoinv.m @ 20642:9d2023d1a63c

binoinv.m: Implement binary search algorithm for 28X performance increase (bug #34363). * binoinv.m: Call new functions scalar_binoinv or vector_binoinv to calculate binoinv. If there are still uncalculated values then call bin_search_binoinv to perform binary search for remaining values. Add more BIST tests. * binoinv.m (scalar_binoinv): New subfunction to calculate binoinv for scalar x. Stops when x > 1000. * binoinv.m (vector_binoinv): New subfunction to calculate binoinv for scalar x. Stops when x > 1000.
author Lachlan Andrew <lachlanbis@gmail.com>
date Sun, 11 Oct 2015 19:49:40 -0700
parents d9341b422488
children 4e307c55a2b5
comparison
equal deleted inserted replaced
20641:c3c052b9192a 20642:9d2023d1a63c
1 ## Copyright (C) 2012 Rik Wehbring 1 ## Copyright (C) 2015 Lachlan Andrew
2 ## Copyright (C) 1995-2015 Kurt Hornik 2 ## Copyright (C) 2012-2015 Rik Wehbring
3 ## Copyright (C) 1995-2012 Kurt Hornik
3 ## 4 ##
4 ## This file is part of Octave. 5 ## This file is part of Octave.
5 ## 6 ##
6 ## Octave is free software; you can redistribute it and/or modify it 7 ## Octave is free software; you can redistribute it and/or modify it
7 ## under the terms of the GNU General Public License as published by 8 ## under the terms of the GNU General Public License as published by
22 ## For each element of @var{x}, compute the quantile (the inverse of the CDF) 23 ## For each element of @var{x}, compute the quantile (the inverse of the CDF)
23 ## at @var{x} of the binomial distribution with parameters 24 ## at @var{x} of the binomial distribution with parameters
24 ## @var{n} and @var{p}, where @var{n} is the number of trials and 25 ## @var{n} and @var{p}, where @var{n} is the number of trials and
25 ## @var{p} is the probability of success. 26 ## @var{p} is the probability of success.
26 ## @end deftypefn 27 ## @end deftypefn
27
28 ## Author: KH <Kurt.Hornik@wu-wien.ac.at>
29 ## Description: Quantile function of the binomial distribution
30 28
31 function inv = binoinv (x, n, p) 29 function inv = binoinv (x, n, p)
32 30
33 if (nargin != 3) 31 if (nargin != 3)
34 print_usage (); 32 print_usage ();
56 inv(k) = NaN; 54 inv(k) = NaN;
57 55
58 k = find ((x >= 0) & (x <= 1) & (n >= 0) & (n == fix (n) 56 k = find ((x >= 0) & (x <= 1) & (n >= 0) & (n == fix (n)
59 & (p >= 0) & (p <= 1))); 57 & (p >= 0) & (p <= 1)));
60 if (any (k)) 58 if (any (k))
59 x = x(k);
61 if (isscalar (n) && isscalar (p)) 60 if (isscalar (n) && isscalar (p))
62 cdf = binopdf (0, n, p) * ones (size (k)); 61 [inv(k), unfinished] = scalar_binoinv (x(:), n, p);
63 while (any (inv(k) < n)) 62 k = k(unfinished);
64 m = find (cdf < x(k)); 63 if (! isempty (k))
65 if (any (m)) 64 inv(k) = bin_search_binoinv (x(k), n, p);
66 inv(k(m)) = inv(k(m)) + 1; 65 endif
67 cdf(m) = cdf(m) + binopdf (inv(k(m)), n, p);
68 else
69 break;
70 endif
71 endwhile
72 else 66 else
73 cdf = binopdf (0, n(k), p(k)); 67 [inv(k), unfinished] = vector_binoinv (x(:), n(:), p(:));
74 while (any (inv(k) < n(k))) 68 k = k(unfinished);
75 m = find (cdf < x(k)); 69 if (! isempty (k))
76 if (any (m)) 70 inv(k) = bin_search_binoinv (x(k), n(k), p(k));
77 inv(k(m)) = inv(k(m)) + 1; 71 endif
78 cdf(m) = cdf(m) + binopdf (inv(k(m)), n(k(m)), p(k(m)));
79 else
80 break;
81 endif
82 endwhile
83 endif 72 endif
84 endif 73 endif
85 74
75 endfunction
76
77 ## Core algorithm to calculate the inverse binomial, for n and p real scalars
78 ## and y a column vector, and for which the output is not NaN or Inf.
79 ## Compute CDF in batches of doubling size until CDF > x, or answer > 500
80 ## Return the locations of unfinished cases in k.
81 function [m, k] = scalar_binoinv (x, n, p)
82 k = 1:length (x);
83 m = zeros (size (x));
84 prev_limit = 0;
85 limit = 10;
86 cdf = 0;
87 v = 0;
88 do
89 cdf = binocdf (prev_limit:limit-1, n, p);
90 r = bsxfun (@le, x(k), cdf);
91 [v, m(k)] = max (r, [], 2); # find first instance of x <= cdf
92 m(k) += prev_limit - 1;
93 k = k(v == 0);
94
95 prev_limit = limit;
96 limit += limit;
97 until (isempty (k) || limit >= 1000)
98
99 endfunction
100
101 ## Core algorithm to calculate the inverse binomial, for n, p, and y column
102 ## vectors, and for which the output is not NaN or Inf.
103 ## Compute CDF in batches of doubling size until CDF > x, or answer > 500
104 ## Return the locations of unfinished cases in k.
105 ## Calculates CDF by summing PDF, which is faster than calls to binocdf.
106 function [m, k] = vector_binoinv (x, n, p)
107 k = 1:length(x);
108 m = zeros (size (x));
109 prev_limit = 0;
110 limit = 10;
111 cdf = 0;
112 v = 0;
113 do
114 xx = repmat (prev_limit:limit-1, [length(k), 1]);
115 nn = kron (ones (1, limit-prev_limit), n(k));
116 pp = kron (ones (1, limit-prev_limit), p(k));
117 pdf = binopdf (xx, nn, pp);
118 pdf(:,1) += cdf(v==0, end);
119 cdf = cumsum (pdf, 2);
120 r = bsxfun (@le, x(k), cdf);
121 [v, m(k)] = max (r, [], 2); # find first instance of x <= cdf
122 m(k) += prev_limit - 1;
123 k = k(v == 0);
124
125 prev_limit = limit;
126 limit += min (limit, max (1e4/numel (k), 10)); # limit memory use
127 until (isempty (k) || limit >= 1000)
128
129 endfunction
130
131 ## Vectorized binary search.
132 ## Can handle vectors n and p, and is faster than the scalar case when the
133 ## answer is large.
134 ## Could be optimized to call binocdf only for a subset of the x at each stage,
135 ## but care must be taken to handle both scalar and vector n, p. Bookkeeping
136 ## may cost more than the extra computations.
137 function m = bin_search_binoinv (x, n, p)
138 k = 1:length (x);
139 lower = zeros (size (x));
140 limit = 500; # lower bound on point at which prev phase finished
141 while (any (k) && limit < 1e100)
142 cdf = binocdf (limit, n, p);
143 k = (x > cdf);
144 lower(k) = limit;
145 limit += limit;
146 end
147 upper = max (2*lower, 1);
148 k = find (lower != limit/2); # elements for which above loop finished
149 for i = 1:ceil (log2 (max (lower)))
150 mid = (upper + lower)/2;
151 cdf = binocdf (floor(mid(:)), n, p);
152 r = (x <= cdf);
153 upper(r) = mid(r);
154 lower(!r) = mid(!r);
155 endfor
156 m = ceil (lower);
157 m(x > binocdf (m(:), n, p)) += 1; # fix off-by-one errors from binary search
86 endfunction 158 endfunction
87 159
88 160
89 %!shared x 161 %!shared x
90 %! x = [-1 0 0.5 1 2]; 162 %! x = [-1 0 0.5 1 2];
99 %!assert (binoinv ([x, NaN], 2, 0.5), [NaN 0 1 2 NaN NaN]) 171 %!assert (binoinv ([x, NaN], 2, 0.5), [NaN 0 1 2 NaN NaN])
100 %!assert (binoinv (single ([x, NaN]), 2, 0.5), single ([NaN 0 1 2 NaN NaN])) 172 %!assert (binoinv (single ([x, NaN]), 2, 0.5), single ([NaN 0 1 2 NaN NaN]))
101 %!assert (binoinv ([x, NaN], single (2), 0.5), single ([NaN 0 1 2 NaN NaN])) 173 %!assert (binoinv ([x, NaN], single (2), 0.5), single ([NaN 0 1 2 NaN NaN]))
102 %!assert (binoinv ([x, NaN], 2, single (0.5)), single ([NaN 0 1 2 NaN NaN])) 174 %!assert (binoinv ([x, NaN], 2, single (0.5)), single ([NaN 0 1 2 NaN NaN]))
103 175
176 ## Test accuracy, to within +/- 1 since it is a discrete distribution
177 %!shared y, tol
178 %! y = magic (3) + 1;
179 %! tol = 1;
180 %!assert (binoinv (binocdf (1:10, 11, 0.1), 11, 0.1), 1:10, tol)
181 %!assert (binoinv (binocdf (1:10, 2*(1:10), 0.1), 2*(1:10), 0.1), 1:10, tol)
182 %!assert (binoinv (binocdf (y, 2*y, 1./y), 2*y, 1./y), y, tol)
183
104 ## Test input validation 184 ## Test input validation
105 %!error binoinv () 185 %!error binoinv ()
106 %!error binoinv (1) 186 %!error binoinv (1)
107 %!error binoinv (1,2) 187 %!error binoinv (1,2)
108 %!error binoinv (1,2,3,4) 188 %!error binoinv (1,2,3,4)