changeset 17697:f0e777cf348f

kurtosis.m: Improve compatibility with Matlab's kurtosis function * kurtosis.m: Change the definition of the kurtosis (do not substract 3). Add a 'flag' input argument to select either the sample kurtosis (default) or its "bias corrected" version. Return NaN (instead of 0) when the standard deviation is zero. Update documentation. Fix old tests and add some new ones.
author Julien Bect <julien.bect@supelec.fr>
date Fri, 18 Oct 2013 13:23:56 +0200
parents 92226d09f91a
children 9bb5d3f63cdd
files scripts/statistics/base/kurtosis.m
diffstat 1 files changed, 99 insertions(+), 32 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/statistics/base/kurtosis.m	Sat Oct 19 06:36:41 2013 +0100
+++ b/scripts/statistics/base/kurtosis.m	Fri Oct 18 13:23:56 2013 +0200
@@ -1,3 +1,4 @@
+## Copyright (C) 2013 Julien Bect
 ## Copyright (C) 1996-2012 John W. Eaton
 ##
 ## This file is part of Octave.
@@ -18,33 +19,59 @@
 
 ## -*- texinfo -*-
 ## @deftypefn  {Function File} {} kurtosis (@var{x})
-## @deftypefnx {Function File} {} kurtosis (@var{x}, @var{dim})
-## Compute the kurtosis of the elements of the vector @var{x}.
+## @deftypefnx {Function File} {} kurtosis (@var{x}, @var{flag})
+## @deftypefnx {Function File} {} kurtosis (@var{x}, @var{flag}, @var{dim})
+## Compute the sample kurtosis of the elements of @var{x}:
 ## @tex
 ## $$
-##  {\rm kurtosis} (x) = {1\over N \sigma^4} \sum_{i=1}^N (x_i-\bar{x})^4 - 3
+## \kappa_1 = {{{1\over N}\,
+##          \sum_{i=1}^N (@var{x}_i - \bar{@var{x}})^4} \over \sigma^4},
 ## $$
-## where $\bar{x}$ is the mean value of $x$.
+## where $N$ is the length of @var{x}, $\bar{@var{x}}$ its mean and $\sigma$
+## its (uncorrected) standard deviation.
 ## @end tex
 ## @ifnottex
 ##
 ## @example
 ## @group
-##                 1    sum ((x - mean(x)).^4)
-## kurtosis (x) = --- * ----------------------  -  3
-##                 N           std(x)^4
+##      mean ((@var{x} - mean (@var{x})).^4)
+## k1 = ------------------------.
+##             std (@var{x}).^4
 ## @end group
 ## @end example
 ##
 ## @end ifnottex
-## If @var{x} is a matrix, return the kurtosis over the
-## first non-singleton dimension of the matrix.  If the optional
+##
+## @noindent
+## The optional argument @var{flag} controls which normalization is used.
+## If @var{flag} is equal to 1 (default value, used when @var{flag} is omitted
+## or empty), return the sample kurtosis as defined above.  If @var{flag} is
+## equal to 0, return the "bias-corrected" kurtosis coefficient instead:
+## @tex
+## $$
+## \kappa_0 = 3 + {\scriptstyle N - 1 \over \scriptstyle (N - 2)(N - 3)} \,
+##     \left( (N + 1)\, \kappa_1 - 3 (N - 1) \right).
+## $$
+## @end tex
+## @ifnottex
+##
+## @example
+## @group
+##               N - 1
+## k0 = 3 + -------------- * ((N + 1) * k1 - 3 * (N - 1))
+##          (N - 2)(N - 3)
+## @end group
+## @end example
+##
+## @end ifnottex
+## The bias-corrected kurtosis coefficient is obtained by replacing the sample
+## second and fourth central moments by their unbiased versions. It is an
+## unbiased estimate of the population kurtosis for normal populations.
+##
+## If @var{x} is a matrix, or more generally a multi-dimensional array, return
+## the kurtosis along the first non-singleton dimension.  If the optional
 ## @var{dim} argument is given, operate along this dimension.
 ##
-## Note: The definition of kurtosis above yields a kurtosis of zero for the
-## stdnormal distribution and is sometimes referred to as "excess kurtosis".
-## To calculate kurtosis without the normalization factor of @math{-3} use
-## @code{moment (@var{x}, 4, 'c') / std (@var{x})^4}.
 ## @seealso{var, skewness, moment}
 ## @end deftypefn
 
@@ -52,9 +79,9 @@
 ## Created: 29 July 1994
 ## Adapted-By: jwe
 
-function retval = kurtosis (x, dim)
+function y = kurtosis (x, flag, dim)
 
-  if (nargin != 1 && nargin != 2)
+  if (nargin < 1) || (nargin > 3)
     print_usage ();
   endif
 
@@ -62,26 +89,44 @@
     error ("kurtosis: X must be a numeric vector or matrix");
   endif
 
+  if (nargin < 2 || isempty (flag))
+    flag = 1;  # default: do not use the "bias corrected" version
+  else
+    if ((! isscalar (flag)) || (flag != 0 && flag != 1))
+      error ("kurtosis: FLAG must be 0 or 1");
+    endif
+  endif
+
   nd = ndims (x);
   sz = size (x);
-  if (nargin != 2)
+  if (nargin < 3)
     ## Find the first non-singleton dimension.
     (dim = find (sz > 1, 1)) || (dim = 1);
   else
-    if (!(isscalar (dim) && dim == fix (dim))
-        || !(1 <= dim && dim <= nd))
+    if (! (isscalar (dim) && dim == fix (dim)) || ! (1 <= dim && dim <= nd))
       error ("kurtosis: DIM must be an integer and a valid dimension");
     endif
   endif
 
   n = sz(dim);
   sz(dim) = 1;
-  x = center (x, dim);  # center also promotes integer to double for next line
-  retval = zeros (sz, class (x));
-  s = std (x, [], dim);
-  idx = find (s > 0);
-  x = sum (x.^4, dim);
-  retval(idx) = x(idx) ./ (n * s(idx) .^ 4) - 3;
+
+  x = center (x, dim);   # center also promotes integer, logical to double
+  v = var (x, 1, dim);   # normalize with 1/N
+  y = sum (x .^ 4, dim);
+  idx = (v != 0);
+  y(idx) = y(idx) ./ (n * v(idx) .^ 2);
+  y(! idx) = NaN;
+
+  ## Apply bias correction to the second and fourth central sample moment  
+  if (flag == 0)
+    if (n > 3)
+      C = (n - 1) / ((n - 2) * (n - 3));
+      y = 3 + C * ((n + 1) * y - 3 * (n - 1));
+    else
+      y(:) = NaN;
+    endif
+  endif
 
 endfunction
 
@@ -89,16 +134,38 @@
 %!test
 %! x = [-1; 0; 0; 0; 1];
 %! y = [x, 2*x];
-%! assert (kurtosis (y), [-1.4, -1.4], sqrt (eps));
+%! assert (kurtosis (y), [2.5, 2.5], sqrt (eps));
+
+%!assert (kurtosis ([-3, 0, 1]) == kurtosis ([-1, 0, 3]))
+%!assert (kurtosis (ones (3, 5)), NaN (1, 5))
+
+%!assert (kurtosis ([1:5 10; 1:5 10],  0, 2), 5.4377317925288901 * [1; 1], 8 * eps)
+%!assert (kurtosis ([1:5 10; 1:5 10],  1, 2), 2.9786509002956195 * [1; 1], 8 * eps)
+%!assert (kurtosis ([1:5 10; 1:5 10], [], 2), 2.9786509002956195 * [1; 1], 8 * eps)
 
-%!assert (kurtosis (single (1)), single (0))
+## Test behaviour on single input
+%!assert (kurtosis (single ([1:5 10])), single (2.9786513), eps ("single"))
+%!assert (kurtosis (single ([1 2]), 0), single (NaN))
+
+## Verify no "divide-by-zero" warnings
+%!test
+%! wstate = warning ("query", "Octave:divide-by-zero");
+%! warning ("on", "Octave:divide-by-zero");
+%! unwind_protect
+%!   lastwarn ("");  # clear last warning
+%!   kurtosis (1);
+%!   assert (lastwarn (), "");
+%! unwind_protect_cleanup
+%!   warning (wstate, "Octave:divide-by-zero");
+%! end_unwind_protect
 
 %% Test input validation
 %!error kurtosis ()
 %!error kurtosis (1, 2, 3)
-%!error kurtosis (['A'; 'B'])
-%!error kurtosis (1, ones (2,2))
-%!error kurtosis (1, 1.5)
-%!error kurtosis (1, 0)
-%!error kurtosis (1, 3)
-
+%!error <X must be a numeric vector or matrix> kurtosis (['A'; 'B'])
+%!error <FLAG must be 0 or 1> kurtosis (1, 2)
+%!error <FLAG must be 0 or 1> kurtosis (1, [1 0])
+%!error <DIM must be an integer> kurtosis (1, [], ones (2,2))
+%!error <DIM must be an integer> kurtosis (1, [], 1.5)
+%!error <DIM must be .* a valid dimension> kurtosis (1, [], 0)
+%!error <DIM must be .* a valid dimension> kurtosis (1, [], 3)