changeset 17581:ab75e72c5b36

skewness.m: Improve compatibility with Matlab's skewness function * skewness.m: Add a 'flag' input argument to select either the sample skewness (default) or its "bias corrected" version. Return NaN (instead of 0) when the standard deviation is zero. Fix documentation. Fix old tests and add some new ones.
author Julien Bect <julien.bect@supelec.fr>
date Tue, 01 Oct 2013 21:51:17 +0200
parents 3ef7d28833f3
children 7004c733412f
files scripts/statistics/base/skewness.m
diffstat 1 files changed, 84 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/statistics/base/skewness.m	Sun Oct 06 19:08:20 2013 +0200
+++ b/scripts/statistics/base/skewness.m	Tue Oct 01 21:51:17 2013 +0200
@@ -1,3 +1,4 @@
+## Copyright (C) 2013 Julien Bect
 ## Copyright (C) 1996-2012 John W. Eaton
 ##
 ## This file is part of Octave.
@@ -18,26 +19,55 @@
 
 ## -*- texinfo -*-
 ## @deftypefn  {Function File} {} skewness (@var{x})
-## @deftypefnx {Function File} {} skewness (@var{x}, @var{dim})
-## Compute the skewness of the elements of the vector @var{x}.
+## @deftypefnx {Function File} {} skewness (@var{x}, @var{flag})
+## @deftypefnx {Function File} {} skewness (@var{x}, @var{flag}, @var{dim})
+## Compute the sample skewness of the elements of @var{x}:
 ## @tex
 ## $$
-## {\rm skewness} (x) = {1\over N \sigma^3} \sum_{i=1}^N (x_i-\bar{x})^3
+## {\rm skewness} (@var{x}) = {{{1\over N}\,
+##          \sum_{i=1}^N (@var{x}_i - \bar{@var{x}})^3} \over \sigma^3},
 ## $$
-## where $\bar{x}$ is the mean value of $x$.
+## where $N$ is the length of @var{x}, $\bar{@var{x}}$ its mean and $\sigma$
+## its (uncorrected) standard deviation.
 ## @end tex
 ## @ifnottex
 ##
 ## @example
-## skewness (x) = 1/N std(x)^(-3) sum ((x - mean(x)).^3)
+##                mean ((@var{x} - mean (@var{x})).^3)
+## skewness (@var{X}) = ------------------------.
+##                     std (@var{x}, 1).^3
 ## @end example
 ##
 ## @end ifnottex
 ##
 ## @noindent
-## If @var{x} is a matrix, return the skewness along the
-## first non-singleton dimension of the matrix.  If the optional
+## The optional argument @var{flag} controls which normalization is used.
+## If @var{flag} is equal to 1 (default value, used when @var{flag} is omitted
+## or empty), return the sample skewness as defined above. If @var{flag} is
+## equal to 0, return the adjusted skewness coefficient instead:
+## @tex
+## $$
+## {\rm skewness} (@var{x}) = {{{N \over (N - 1) (N - 2)}\,
+##          \sum_{i=1}^N (@var{x}_i - \bar{@var{x}})^3} \over @var{s}^3},
+## $$
+## where @var{s} is the corrected standard deviation of @var{x}.
+## @end tex
+## @ifnottex
+##
+## @example
+##                         N^2        mean ((@var{x} - mean (@var{x})).^3)
+## skewness (@var{X}, 0) = -------------- * ------------------------.
+##                   (N - 1)(N - 2)        std (@var{x}, 0).^3
+## @end example
+##
+## @end ifnottex
+## The adjusted skewness coefficient is obtained by replacing the sample second
+## and third central moments by their bias-corrected versions.
+##
+## If @var{x} is a matrix, or more generally a multidimensional array, return
+## the skewness along the first non-singleton dimension.  If the optional
 ## @var{dim} argument is given, operate along this dimension.
+##
 ## @seealso{var, kurtosis, moment}
 ## @end deftypefn
 
@@ -45,57 +75,84 @@
 ## Created: 29 July 1994
 ## Adapted-By: jwe
 
-function retval = skewness (x, dim)
+function y = skewness (x, flag, dim)
 
-  if (nargin != 1 && nargin != 2)
+  if (nargin < 1) || (nargin > 3)
     print_usage ();
   endif
 
-  if (! (isnumeric (x) || islogical (x)))
+  if (! isnumeric (x)) || islogical (x)
     error ("skewness: X must be a numeric vector or matrix");
   endif
 
+  if (nargin < 2) || isempty (flag)
+    flag = 1;  # default: do not use the "bias corrected" version
+  else
+    flag = double (flag);
+    if (~ isequal (flag, 0)) && (~ isequal (flag, 1))
+      error ("skewness: FLAG must be 0 or 1");
+    end
+  endif
+
   nd = ndims (x);
   sz = size (x);
-  if (nargin != 2)
+  if nargin < 3
     ## Find the first non-singleton dimension.
     (dim = find (sz > 1, 1)) || (dim = 1);
   else
-    if (!(isscalar (dim) && dim == fix (dim))
-        || !(1 <= dim && dim <= nd))
+    if (!(isscalar (dim) && dim == fix (dim)) || !(1 <= dim && dim <= nd))
       error ("skewness: DIM must be an integer and a valid dimension");
     endif
   endif
 
   n = sz(dim);
   sz(dim) = 1;
-  x = center (x, dim);  # center also promotes integer to double for next line
-  retval = zeros (sz, class (x));
-  s = std (x, [], dim);
-  idx = find (s > 0);
-  x = sum (x .^ 3, dim);
-  retval(idx) = x(idx) ./ (n * s(idx) .^ 3);
+
+  ## In the following sequence of computations, if x is of class single,
+  ## then the result y is also of class single
+  x = center (x, dim);
+  s = std (x, flag, dim);  # use the biased estimator of the variance
+  y = sum (x .^ 3, dim);
+  y = y ./ (n * s .^ 3);
+  y(s <= 0) = nan;
+
+  ## Apply bias correction to the third central sample moment
+  if flag == 0
+    if n > 2
+      y = y * (n * n ) / ((n - 1) * (n - 2));
+    else
+      y(:) = nan;
+    end
+  endif
 
 endfunction
 
 
-%!assert (skewness ([-1,0,1]), 0)
-%!assert (skewness ([-2,0,1]) < 0)
-%!assert (skewness ([-1,0,2]) > 0)
-%!assert (skewness ([-3,0,1]) == -1*skewness ([-1,0,3]))
+%!assert (skewness ([-1, 0, 1]), 0)
+%!assert (skewness ([-2, 0, 1]) < 0)
+%!assert (skewness ([-1, 0, 2]) > 0)
+%!assert (skewness ([-3, 0, 1]) == -1 * skewness ([-1, 0, 3]))
+%!assert (skewness (ones (3, 5)), nan (1, 5))
+
 %!test
 %! x = [0; 0; 0; 1];
 %! y = [x, 2*x];
-%! assert(all (abs (skewness (y) - [0.75, 0.75]) < sqrt (eps)));
+%! assert (skewness (y),  1.154700538379251 * [1 1], 1e-13)
+
+%!assert (skewness ([1:5 10; 1:5 10],  0, 2), 1.439590274527954 * [1; 1], 1e-15)
+%!assert (skewness ([1:5 10; 1:5 10],  1, 2), 1.051328089232020 * [1; 1], 1e-15)
+%!assert (skewness ([1:5 10; 1:5 10], [], 2), 1.051328089232020 * [1; 1], 1e-15)
 
-%!assert (skewness (single (1)), single (0))
+## Test behaviour on single input
+%! assert (skewness (single ([1:5 10])), single (1.0513283))
+%! assert (skewness (single ([1 2]), 0), single (nan))
 
-%% Test input validation
+## Test input validation
 %!error skewness ()
 %!error skewness (1, 2, 3)
 %!error skewness (['A'; 'B'])
 %!error skewness (1, ones (2,2))
 %!error skewness (1, 1.5)
-%!error skewness (1, 0)
+%!error skewness (1, [], 0)
 %!error skewness (1, 3)