Mercurial > octave-nkf
comparison scripts/statistics/distributions/nbininv.m @ 20643:d6d04088ac9e
nbininv.m: Increase speed (85X) and accuracy of function (bug #34363).
* nbininv.m: Call new function scalar_nbininv to calculate nbininv for scalar.
If there are still uncalculated values then call bin_search_nbininv. Call
bin_search_nbininv directly for vectors. Add more BIST tests.
* nbininv.m (scalar_binoinv): New subfunction to calculate nbininv for scalar x.
Stops when x > 1000.
* nbininv.m (bin_search_nbininv): New subfunction to do binary search for nbininv.
author | Lachlan Andrew <lachlanbis@gmail.com> |
---|---|
date | Sun, 11 Oct 2015 20:33:37 -0700 |
parents | d9341b422488 |
children |
comparison
equal
deleted
inserted
replaced
20642:9d2023d1a63c | 20643:d6d04088ac9e |
---|---|
1 ## Copyright (C) 2012 Rik Wehbring | 1 ## Copyright (C) 2015 Lachlan Andrew |
2 ## Copyright (C) 1995-2015 Kurt Hornik | 2 ## Copyright (C) 2012-2015 Rik Wehbring |
3 ## Copyright (C) 1995-2012 Kurt Hornik | |
3 ## | 4 ## |
4 ## This file is part of Octave. | 5 ## This file is part of Octave. |
5 ## | 6 ## |
6 ## Octave is free software; you can redistribute it and/or modify it | 7 ## Octave is free software; you can redistribute it and/or modify it |
7 ## under the terms of the GNU General Public License as published by | 8 ## under the terms of the GNU General Public License as published by |
27 ## When @var{n} is extended to real numbers this is the Polya distribution. | 28 ## When @var{n} is extended to real numbers this is the Polya distribution. |
28 ## | 29 ## |
29 ## The number of failures in a Bernoulli experiment with success probability | 30 ## The number of failures in a Bernoulli experiment with success probability |
30 ## @var{p} before the @var{n}-th success follows this distribution. | 31 ## @var{p} before the @var{n}-th success follows this distribution. |
31 ## @end deftypefn | 32 ## @end deftypefn |
32 | |
33 ## Author: KH <Kurt.Hornik@wu-wien.ac.at> | |
34 ## Description: Quantile function of the Pascal distribution | |
35 | 33 |
36 function inv = nbininv (x, n, p) | 34 function inv = nbininv (x, n, p) |
37 | 35 |
38 if (nargin != 3) | 36 if (nargin != 3) |
39 print_usage (); | 37 print_usage (); |
63 k = (x == 1) & (n > 0) & (n < Inf) & (p >= 0) & (p <= 1); | 61 k = (x == 1) & (n > 0) & (n < Inf) & (p >= 0) & (p <= 1); |
64 inv(k) = Inf; | 62 inv(k) = Inf; |
65 | 63 |
66 k = find ((x >= 0) & (x < 1) & (n > 0) & (n < Inf) | 64 k = find ((x >= 0) & (x < 1) & (n > 0) & (n < Inf) |
67 & (p > 0) & (p <= 1)); | 65 & (p > 0) & (p <= 1)); |
68 m = zeros (size (k)); | 66 if (! isempty (k)) |
69 x = x(k); | 67 x = x(k); |
70 if (isscalar (n) && isscalar (p)) | 68 m = zeros (size (k)); |
71 s = p ^ n * ones (size (k)); | 69 if (isscalar (n) && isscalar (p)) |
72 while (1) | 70 [m, unfinished] = scalar_nbininv (x(:), n, p); |
73 l = find (s < x); | 71 m(unfinished) = bin_search_nbininv (x(unfinished), n, p); |
74 if (any (l)) | 72 else |
75 m(l) = m(l) + 1; | 73 m = bin_search_nbininv (x, n(k), p(k)); |
76 s(l) = s(l) + nbinpdf (m(l), n, p); | 74 endif |
77 else | 75 inv(k) = m; |
78 break; | |
79 endif | |
80 endwhile | |
81 else | |
82 n = n(k); | |
83 p = p(k); | |
84 s = p .^ n; | |
85 while (1) | |
86 l = find (s < x); | |
87 if (any (l)) | |
88 m(l) = m(l) + 1; | |
89 s(l) = s(l) + nbinpdf (m(l), n(l), p(l)); | |
90 else | |
91 break; | |
92 endif | |
93 endwhile | |
94 endif | 76 endif |
95 inv(k) = m; | 77 |
78 endfunction | |
79 | |
80 | |
81 ## Core algorithm to calculate the inverse negative binomial, for n and p real | |
82 ## scalars and y a column vector, and for which the output is not NaN or Inf. | |
83 ## Compute CDF in batches of doubling size until CDF > x, or answer > 500. | |
84 ## Return the locations of unfinished cases in k. | |
85 function [m, k] = scalar_nbininv (x, n, p) | |
86 k = 1:length (x); | |
87 m = zeros (size (x)); | |
88 prev_limit = 0; | |
89 limit = 10; | |
90 do | |
91 cdf = nbincdf (prev_limit:limit, n, p); | |
92 r = bsxfun (@le, x(k), cdf); | |
93 [v, m(k)] = max (r, [], 2); # find first instance of x <= cdf | |
94 m(k) += prev_limit - 1; | |
95 k = k(v == 0); | |
96 | |
97 prev_limit = limit; | |
98 limit += limit; | |
99 until (isempty (k) || limit >= 1000) | |
100 | |
101 endfunction | |
102 | |
103 ## Vectorized binary search. | |
104 ## Can handle vectors n and p, and is faster than the scalar case when the | |
105 ## answer is large. | |
106 ## Could be optimized to call nbincdf only for a subset of the x at each stage, | |
107 ## but care must be taken to handle both scalar and vector n,p. Bookkeeping | |
108 ## may cost more than the extra computations. | |
109 function m = bin_search_nbininv (x, n, p) | |
110 k = 1:length (x); | |
111 lower = zeros (size (x)); | |
112 limit = 1; | |
113 while (any (k) && limit < 1e100) | |
114 cdf = nbincdf (limit, n, p); | |
115 k = (x > cdf); | |
116 lower(k) = limit; | |
117 limit += limit; | |
118 end | |
119 upper = max (2*lower, 1); | |
120 k = find (lower != limit/2); # elements for which above loop finished | |
121 for i = 1:ceil (log2 (max (lower))) | |
122 mid = (upper + lower)/2; | |
123 cdf = nbincdf (floor (mid), n, p); | |
124 r = (x <= cdf); | |
125 upper(r) = mid(r); | |
126 lower(!r) = mid(!r); | |
127 endfor | |
128 m = ceil (lower); | |
129 m(x > nbincdf (m, n, p)) += 1; # fix off-by-one errors from binary search | |
96 | 130 |
97 endfunction | 131 endfunction |
98 | 132 |
99 | 133 |
100 %!shared x | 134 %!shared x |
111 %!assert (nbininv ([x, NaN], 1, 0.5), [NaN 0 1 Inf NaN NaN]) | 145 %!assert (nbininv ([x, NaN], 1, 0.5), [NaN 0 1 Inf NaN NaN]) |
112 %!assert (nbininv (single ([x, NaN]), 1, 0.5), single ([NaN 0 1 Inf NaN NaN])) | 146 %!assert (nbininv (single ([x, NaN]), 1, 0.5), single ([NaN 0 1 Inf NaN NaN])) |
113 %!assert (nbininv ([x, NaN], single (1), 0.5), single ([NaN 0 1 Inf NaN NaN])) | 147 %!assert (nbininv ([x, NaN], single (1), 0.5), single ([NaN 0 1 Inf NaN NaN])) |
114 %!assert (nbininv ([x, NaN], 1, single (0.5)), single ([NaN 0 1 Inf NaN NaN])) | 148 %!assert (nbininv ([x, NaN], 1, single (0.5)), single ([NaN 0 1 Inf NaN NaN])) |
115 | 149 |
150 ## Test accuracy, to within +/- 1 since it is a discrete distribution | |
151 %!shared y, tol | |
152 %! y = magic (3) + 1; | |
153 %! tol = 1; | |
154 %!assert (nbininv (nbincdf (1:10, 3, 0.1), 3, 0.1), 1:10, tol) | |
155 %!assert (nbininv (nbincdf (1:10, 3./(1:10), 0.1), 3./(1:10), 0.1), 1:10, tol) | |
156 %!assert (nbininv (nbincdf (y, 3./y, 1./y), 3./y, 1./y), y, tol) | |
157 | |
116 ## Test input validation | 158 ## Test input validation |
117 %!error nbininv () | 159 %!error nbininv () |
118 %!error nbininv (1) | 160 %!error nbininv (1) |
119 %!error nbininv (1,2) | 161 %!error nbininv (1,2) |
120 %!error nbininv (1,2,3,4) | 162 %!error nbininv (1,2,3,4) |