Mercurial > forge
changeset 11343:c9f926dcc22b octave-forge
mmore accurate computation of log likelihood for small k in gevlike
author | nir-krakauer |
---|---|
date | Tue, 01 Jan 2013 22:40:36 +0000 |
parents | ac3b148b09c6 |
children | 4ebe6ca89b4f |
files | main/statistics/inst/gevlike.m |
diffstat | 1 files changed, 154 insertions(+), 79 deletions(-) [+] |
line wrap: on
line diff
--- a/main/statistics/inst/gevlike.m Mon Dec 31 17:36:30 2012 +0000 +++ b/main/statistics/inst/gevlike.m Tue Jan 01 22:40:36 2013 +0000 @@ -77,186 +77,255 @@ #calculate negative log likelihood if isargout(1) - nlogL = sum(gevnll (data, k, sigma, mu) (:)); + [nll, k_terms] = gevnll (data, k, sigma, mu); + nlogL = sum(nll(:)); endif #optionally calculate the first and second derivatives of the negative log likelihood with respect to parameters if isargout(2) - Grad = gevgrad (data, k, sigma, mu); + [Grad, kk_terms] = gevgrad (data, k, sigma, mu, k_terms); endif if isargout(3) - ACOV = gevfim (data, k, sigma, mu); + ACOV = gevfim (data, k, sigma, mu, k_terms, kk_terms); endif endfunction -function nlogL = gevnll (x, k, sigma, mu) +function [nlogL, k_terms] = gevnll (x, k, sigma, mu) #internal function to calculate negative log likelihood for gevlike #no input checking done - if k == 0 - z = (mu - x) ./ sigma; - nlogL = exp(z) - z + log(sigma); + k_terms = []; + a = (x - mu) ./ sigma; + + if all(k == 0) + nlogL = exp(-a) + a + log(sigma); else - z = 1 + k .* (x - mu) ./ sigma; - nlogL = z .^ (-1 ./ k) + (1 + 1 ./ k) .* log(z) + log(sigma); - nlogL(z <= 0) = Inf; + aa = k .* a; + if min(abs(aa)) < 1E-4 && max(abs(aa)) < 0.5 #use a series expansion to find the log likelihood more accurately when k is small + k_terms = 1; sgn = 1; i = 0; + while 1 + sgn = -sgn; i++; + newterm = sgn * (aa .^ i) / (i + 1); + k_terms = k_terms + newterm; + if max(abs(newterm)) <= eps + break + endif + endwhile + nlogL = exp(-a .* k_terms) + a .* (k + 1) .* k_terms + log(sigma); + else + b = 1 + aa; + nlogL = b .^ (-1 ./ k) + (1 + 1 ./ k) .* log(b) + log(sigma); + nlogL(b <= 0) = Inf; + endif endif endfunction -function G = gevgrad (x, k, sigma, mu) +function [G, kk_terms] = gevgrad (x, k, sigma, mu, k_terms) #calculate the gradient of the negative log likelihood of data x with respect to the parameters of the generalized extreme value distribution for gevlike #no input checking done +kk_terms = []; + G = ones(3, 1); if k == 0 ##use the expressions for first derivatives that are the limits as k --> 0 a = (x - mu) ./ sigma; - f = exp(a); + f = exp(-a) - 1; #k - g = -(2 * x .* (mu .* (1 - f) - sigma .* f) + 2 .* sigma .* mu .* f + (x.^2 + mu.^2).*(f - 1)) ./ (2 * f .* sigma .^ 2); + #g = -(2 * x .* (mu .* (1 - f) - sigma .* f) + 2 .* sigma .* mu .* f + (x.^2 + mu.^2).*(f - 1)) ./ (2 * f .* sigma .^ 2); + g = a .* (1 + a .* f / 2); + G(1) = sum(g(:)); #sigma - g = ((a ./ f) - a + 1) ./ sigma; + g = (a .* f + 1) ./ sigma; G(2) = sum(g(:)); #mu - g = (1 ./ f - 1) ./ sigma; + g = f ./ sigma; G(3) = sum(g(:)); return endif -z = 1 + k .* (x - mu) ./ sigma; -if any (z <= 0) +a = (x - mu) ./ sigma; +b = 1 + k .* a; +if any (b <= 0) G(:) = 0; #negative log likelihood is locally infinite return -end +endif #k -a = (x - mu) ./ sigma; -b = k .* a + 1; c = log(b); d = 1 ./ k + 1; -g = (c ./ k - a ./ b) ./ (k .* b .^ (1/k)) - c ./ (k .^ 2) + a .* d ./ b; +if nargin > 4 && ~isempty(k_terms) #use a series expansion to find the gradient more accurately when k is small + aa = k .* a; + f = exp(-a .* k_terms); + kk_terms = 0.5; sgn = 1; i = 0; + while 1 + sgn = -sgn; i++; + newterm = sgn * (aa .^ i) * ((i + 1) / (i + 2)); + kk_terms = kk_terms + newterm; + if max(abs(newterm)) <= eps + break + endif + endwhile + g = a .* ((a .* kk_terms) .* (f - 1 - k) + k_terms); +else + g = (c ./ k - a ./ b) ./ (k .* b .^ (1/k)) - c ./ (k .^ 2) + a .* d ./ b; +endif +%keyboard G(1) = sum(g(:)); #sigma -g = (a .* b .^ (-d) - d .* k .* a ./ b + 1) ./ sigma; +if nargin > 4 && ~isempty(k_terms) #use a series expansion to find the gradient more accurately when k is small + g = (1 - a .* (a .* k .* kk_terms - k_terms) .* (f - k - 1)) ./ sigma; +else + #g = (a .* b .^ (-d) - d .* k .* a ./ b + 1) ./ sigma; + g = (a .* b .^ (-d) - (k + 1) .* a ./ b + 1) ./ sigma; +endif G(2) = sum(g(:)); #mu -g = (b .^ (-d) - d .* k ./ b) ./ sigma; +if nargin > 4 && ~isempty(k_terms) #use a series expansion to find the gradient more accurately when k is small + g = -(a .* k .* kk_terms - k_terms) .* (f - k - 1) ./ sigma; +else + #g = (b .^ (-d) - d .* k ./ b) ./ sigma; + g = (b .^ (-d) - (k + 1) ./ b) ./ sigma; +end G(3) = sum(g(:)); - endfunction -function C = gevfim (x, k, sigma, mu) -#internal function to calculate the (negative) Fisher information matrix for gevlike +function ACOV = gevfim (x, k, sigma, mu, k_terms, kk_terms) +#internal function to calculate the Fisher information matrix for gevlike #no input checking done -#calculate the various second derivatives (used Maxima to find the expressions) +#find the various second derivatives (used Maxima to help find the expressions) -C = ones(3); +ACOV = ones(3); if k == 0 ##use the expressions for second derivatives that are the limits as k --> 0 #k, k a = (x - mu) ./ sigma; - f = exp(a); - der = (x .* (24 * mu .^ 2 .* sigma .* (f - 1) + 24 * mu .* sigma .^ 2 .* f - 12 * mu .^ 3) + x .^ 3 .* (8 * sigma .* (f - 1) - 12*mu) + x .^ 2 .* (-12 * sigma .^ 2 .* f + 24 * mu .* sigma .* (1 - f) + 18 * mu .^ 2) - 12 * mu .^ 2 .* sigma .^ 2 .* f + 8 * mu .^ 3 .* sigma .* (1 - f) + 3 * (x .^ 4 + mu .^ 4)) ./ (12 .* f .* sigma .^ 4); - C(1, 1) = sum(der(:)); + f = exp(-a); + #der = (x .* (24 * mu .^ 2 .* sigma .* (f - 1) + 24 * mu .* sigma .^ 2 .* f - 12 * mu .^ 3) + x .^ 3 .* (8 * sigma .* (f - 1) - 12*mu) + x .^ 2 .* (-12 * sigma .^ 2 .* f + 24 * mu .* sigma .* (1 - f) + 18 * mu .^ 2) - 12 * mu .^ 2 .* sigma .^ 2 .* f + 8 * mu .^ 3 .* sigma .* (1 - f) + 3 * (x .^ 4 + mu .^ 4)) ./ (12 .* f .* sigma .^ 4); + der = (a .^ 2) .* (a .* (a/4 - 2/3) .* f + 2/3 * a - 1); + ACOV(1, 1) = sum(der(:)); #sigma, sigma - der = (sigma .^ -2) .* ... - ((-2*a ./ f) + ... - a .^ 2 ./ f + ... - 2*a - 1); - C(2, 2) = sum(der(:)); + der = (sigma .^ -2) .* (a .* ((a - 2) .* f + 2) - 1); + ACOV(2, 2) = sum(der(:)); #mu, mu - der = (sigma .^ -2) ./ f; - C(3, 3) = sum(der(:)); + der = (sigma .^ -2) .* f; + ACOV(3, 3) = sum(der(:)); #k, sigma - der = (x .^2 .* (2*sigma .* (f - 1) - 3*mu) + ... - x .* (-2 * sigma .^ 2 .* f + 4 * mu .* sigma .* (1 - f) + 3 .* mu .^ 2) + ... - 2 * mu .^ 2 .* sigma .* (f - 1) + 2 * mu * sigma .^ 2 * f + x .^ 3 - mu .^ 3) ... - ./ (2 .* f .* sigma .^ 4); - C(1, 2) = C(2, 1) = sum(der(:)); + #der = (x .^2 .* (2*sigma .* (f - 1) - 3*mu) + x .* (-2 * sigma .^ 2 .* f + 4 * mu .* sigma .* (1 - f) + 3 .* mu .^ 2) + 2 * mu .^ 2 .* sigma .* (f - 1) + 2 * mu * sigma .^ 2 * f + x .^ 3 - mu .^ 3) ./ (2 .* f .* sigma .^ 4); + der = (-a ./ sigma) .* (a .* (1 - a/2) .* f - a + 1); + ACOV(1, 2) = ACOV(2, 1) = sum(der(:)); #k, mu - der = (x .* (2*sigma .* (f - 1) - 2*mu) - 2 * f .* sigma .^ 2 + ... - 2 .* mu .* sigma .* (1 - f) + x .^ 2 + mu .^ 2)... - ./ (2 .* f .* sigma .^ 3); - C(1, 3) = C(3, 1) = sum(der(:)); + #der = (x .* (2*sigma .* (f - 1) - 2*mu) - 2 * f .* sigma .^ 2 + 2 .* mu .* sigma .* (1 - f) + x .^ 2 + mu .^ 2)./ (2 .* f .* sigma .^ 3); + der = (-1 ./ sigma) .* (a .* (1 - a/2) .* f - a + 1); + ACOV(1, 3) = ACOV(3, 1) = sum(der(:)); #sigma, mu - der = (sigma + (x - sigma - mu) ./ f) ./ (sigma .^ 3); - C(2, 3) = C(3, 2) = sum(der(:)); + der = (1 + (a - 1) .* f) ./ (sigma .^ 2); + ACOV(2, 3) = ACOV(3, 2) = sum(der(:)); return endif - #general case z = 1 + k .* (x - mu) ./ sigma; -if any (z <= 0) - C(:) = 0; #negative log likelihood is locally infinite - return -end - #k, k a = (x - mu) ./ sigma; b = k .* a + 1; c = log(b); d = 1 ./ k + 1; -der = ((((c ./ k.^2) - (a ./ (k .* b))) .^ 2) ./ (b .^ (1 ./ k))) + ... +if nargin > 5 && ~isempty(kk_terms) #use a series expansion to find the derivatives more accurately when k is small + aa = k .* a; + f = exp(-a .* k_terms); + kkk_terms = 2/3; sgn = 1; i = 0; + while 1 + sgn = -sgn; i++; + newterm = sgn * (aa .^ i) * ((i + 1) * (i + 2) / (i + 3)); + kkk_terms = kkk_terms + newterm; + if max(abs(newterm)) <= eps + break + endif + endwhile + der = (a .^ 2) .* (a .* (a .* kk_terms .^ 2 - kkk_terms) .* f + a .* (1 + k) .* kkk_terms - 2 * kk_terms); +else + der = ((((c ./ k.^2) - (a ./ (k .* b))) .^ 2) ./ (b .^ (1 ./ k))) + ... ((-2*c ./ k.^3) + (2*a ./ (k.^2 .* b)) + ((a ./ b) .^ 2 ./ k)) ./ (b .^ (1 ./ k)) + ... 2*c ./ k.^3 - ... (2*a ./ (k.^2 .* b)) - (d .* (a ./ b) .^ 2); -C(1, 1) = sum(der(:)); +endif +der(z <= 0) = 0; %no probability mass in this region +ACOV(1, 1) = sum(der(:)); #sigma, sigma -der = (sigma .^ -2) .* (... - -2*a .* b .^ (-d) + ... - d .* k .* a .^ 2 .* (b .^ (-d-1)) + ... - 2 .* d .* k .* a ./ b - ... - d .* (k .* a ./ b) .^ 2 - 1); -C(2, 2) = sum(der(:)); +if nargin > 5 && ~isempty(kk_terms) #use a series expansion to find the derivatives more accurately when k is small + der = ((-2*a .* k_terms + 4 * a .^ 2 .* k .* kk_terms - a .^ 3 .* (k .^ 2) .* kkk_terms) .* (f - k - 1) + f .* ((a .* (k_terms - a .* k .* kk_terms)) .^ 2) - 1) ./ (sigma .^ 2); +else + der = (sigma .^ -2) .* (... + -2*a .* b .^ (-d) + ... + d .* k .* a .^ 2 .* (b .^ (-d-1)) + ... + 2 .* d .* k .* a ./ b - ... + d .* (k .* a ./ b) .^ 2 - 1); +end +der(z <= 0) = 0; %no probability mass in this region +ACOV(2, 2) = sum(der(:)); #mu, mu -der = (d .* (sigma .^ -2)) .* (... +if nargin > 5 && ~isempty(kk_terms) #use a series expansion to find the derivatives involving k more accurately when k is small + der = (f .* (a .* k .* kk_terms - k_terms) .^ 2 - a .* k .^ 2 .* kkk_terms .* (f - k - 1)) ./ (sigma .^ 2); +else + der = (d .* (sigma .^ -2)) .* (... k .* (b .^ (-d-1)) - ... (k ./ b) .^ 2); -C(3, 3) = sum(der(:)); +endif +der(z <= 0) = 0; %no probability mass in this region +ACOV(3, 3) = sum(der(:)); -#k, sigma -der = (a ./ sigma) .* (... -(b .^ (-d)) .* (c ./ k - a ./ b) ./ k + ... --a .* (b .^ (-d-1)) + ... -((1 ./ k) - d) ./ b + ... -a .* k .* d ./ (b .^ 2)); -C(1, 2) = C(2, 1) = sum(der(:)); #k, mu -der = ( (b .^ (-d)) .* (c ./ k - a ./ b) ./ k - ... +if nargin > 5 && ~isempty(kk_terms) #use a series expansion to find the derivatives involving k more accurately when k is small + der = 2 * a .* kk_terms .* (f - 1 - k) - a .^ 2 .* k_terms .* kk_terms .* f + k_terms; #k, a second derivative + der = -der ./ sigma; +else + der = ( (b .^ (-d)) .* (c ./ k - a ./ b) ./ k - ... a .* (b .^ (-d-1)) + ... ((1 ./ k) - d) ./ b + a .* k .* d ./ (b .^ 2)) ./ sigma; -C(1, 3) = C(3, 1) = sum(der(:)); +endif +der(z <= 0) = 0; %no probability mass in this region +ACOV(1, 3) = ACOV(3, 1) = sum(der(:)); + +#k, sigma +der = a .* der; +der(z <= 0) = 0; %no probability mass in this region +ACOV(1, 2) = ACOV(2, 1) = sum(der(:)); #sigma, mu -der = ( -(b .^ (-d)) + ... +if nargin > 5 && ~isempty(kk_terms) #use a series expansion to find the derivatives involving k more accurately when k is small + der = ((-k_terms + 3 * a .* k .* kk_terms - (a .* k) .^ 2 .* kkk_terms) .* (f - k - 1) + a .* (k_terms - a .* k .* kk_terms) .^ 2 .* f) ./ (sigma .^ 2); +else + der = ( -(b .^ (-d)) + ... a .* k .* d .* (b .^ (-d-1)) + ... (d .* k ./ b) - a .* (k./b).^2 .* d) ./ (sigma .^ 2); -C(2, 3) = C(3, 2) = sum(der(:)); +end +der(z <= 0) = 0; %no probability mass in this region +ACOV(2, 3) = ACOV(3, 2) = sum(der(:)); endfunction @@ -268,10 +337,12 @@ %! k = 0.2; %! sigma = 0.3; %! mu = 0.5; -%! [L, ~, C] = gevlike ([k sigma mu], x); +%! [L, D, C] = gevlike ([k sigma mu], x); %! expected_L = 0.75942; +%! expected_D = [0.53150; -0.67790; -2.40674]; %! expected_C = [-0.12547 1.77884 1.06731; 1.77884 16.40761 8.48877; 1.06731 8.48877 0.27979]; %! assert (L, expected_L, 0.001); +%! assert (D, expected_D, 0.001); %! assert (C, expected_C, 0.001); %!test @@ -279,10 +350,12 @@ %! k = 0; %! sigma = 0.3; %! mu = 0.5; -%! [L, ~, C] = gevlike ([k sigma mu], x); +%! [L, D, C] = gevlike ([k sigma mu], x); %! expected_L = 0.65157; +%! expected_D = [0.54011; -1.17291; -2.70375]; %! expected_C = [0.090036 3.41229 2.047337; 3.412229 24.760027 12.510190; 2.047337 12.510190 2.098618]; %! assert (L, expected_L, 0.001); +%! assert (D, expected_D, 0.001); %! assert (C, expected_C, 0.001); %!test @@ -290,8 +363,10 @@ %! k = -0.2; %! sigma = 0.3; %! mu = 0.5; -%! [L, ~, C] = gevlike ([k sigma mu], x); +%! [L, D, C] = gevlike ([k sigma mu], x); %! expected_L = 3786.4; +%! expected_D = [6.4511e+04; -4.8194e+04; 3.0633e+03]; %! expected_C = -[-1.4937e+06 1.0083e+06 -6.1837e+04; 1.0083e+06 -8.1138e+05 4.0917e+04; -6.1837e+04 4.0917e+04 -2.0422e+03]; %! assert (L, expected_L, -0.001); +%! assert (D, expected_D, -0.001); %! assert (C, expected_C, -0.001);