changeset 30410:bc0de453fb6a stable

extend var and std with dim options (bug #58116) and weights (patch #10103) * /scripts/statistics/var.m: adds scalar and vector weights, vector and "all" dim options, compatible empty input handling. Also updates BISTs and dosctring. * /scripts/statistics/std.m: simplifies std as simple sqrt(var(x)) wrapper, inheriting the extensions to var. Updates docstring and BISTs accordingly. * /NEWS: notes improved matlab compatibility
author Stefano Guidoni <ilguido@users.sf.net>
date Wed, 01 Dec 2021 12:42:08 -0500
parents 597275db9c7f
children 0fee9e910d84
files NEWS scripts/statistics/std.m scripts/statistics/var.m
diffstat 3 files changed, 240 insertions(+), 91 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS	Tue Nov 30 11:04:27 2021 -0500
+++ b/NEWS	Wed Dec 01 12:42:08 2021 -0500
@@ -263,6 +263,10 @@
 - The function `repelem` now produces a row vector output when the input is
 a scalar.
 
+- The functions `var` and `std` now accept a weight vector as input and
+compute the weigthed variance.  Dimension input now allows a vector and
+the keyword "all".
+
 ### Alphabetical list of new functions added in Octave 7
 
 * `cospi`
--- a/scripts/statistics/std.m	Tue Nov 30 11:04:27 2021 -0500
+++ b/scripts/statistics/std.m	Wed Dec 01 12:42:08 2021 -0500
@@ -25,8 +25,9 @@
 
 ## -*- texinfo -*-
 ## @deftypefn  {} {} std (@var{x})
-## @deftypefnx {} {} std (@var{x}, @var{opt})
-## @deftypefnx {} {} std (@var{x}, @var{opt}, @var{dim})
+## @deftypefnx {} {} std (@var{x}, @var{w})
+## @deftypefnx {} {} std (@var{x}, @var{w}, @var{dim})
+## @deftypefnx {} {} std (@var{x}, @var{w}, @qcode{"ALL"})
 ## Compute the standard deviation of the elements of the vector @var{x}.
 ##
 ## The standard deviation is defined as
@@ -48,63 +49,47 @@
 ## where @math{N} is the number of elements of the @var{x} vector.
 ## @end ifnottex
 ##
-## If @var{x} is a matrix, compute the standard deviation for each column and
-## return them in a row vector.
+## If @var{x} is an array, compute the standard deviation for each column and
+## return them in a row vector (or for an n-D array, the result is returned as
+## an array of dimension 1 x n x m x @dots{}).
 ##
-## The argument @var{opt} determines the type of normalization to use.
-## Valid values are
+## The optional argument @var{w} determines the weighting scheme to use.  Valid
+## values are:
 ##
 ## @table @asis
-## @item 0:
-##   normalize with @math{N-1}, provides the square root of the best unbiased
-## estimator of the variance [default]
+## @item 0 [default]:
+## Normalize with @math{N-1}.  This provides the square root of the best
+## unbiased estimator of the variance.
 ##
 ## @item 1:
-##   normalize with @math{N}, this provides the square root of the second
-## moment around the mean
+## Normalize with @math{N}. This provides the square root of the second moment
+## around the mean.
+##
+## @item a vector:
+## Compute the weighted standard deviation with nonnegative scalar weights. The
+## length of @var{w} must be equal to the size of @var{x} along dimension
+## @var{dim}.
 ## @end table
 ##
-## If the optional argument @var{dim} is given, operate along this dimension.
+## If @math{N} is equal to 1 the value of @var{W} is ignored and
+## normalization by @math{N} is used.
+##
+## The optional variable @var{dim} forces @code{std} to operate over the
+## specified dimension.  @var{dim} can either be a scalar dimension or a vector
+## of non-repeating dimensions over which to operate.  Dimensions must be
+## positive integers, and the standard deviation is calculated over the array
+## slice defined by @var{dim}.
+##
+## Specifying dimension @qcode{"ALL"} will force @code{std} to operate on all
+## elements of @var{x}, and is equivalent to @code{std (@var{x}(:))}.
+##
+## When @var{dim} is a vector or @qcode{"ALL"}, @var{w} must be either 0 or 1.
 ## @seealso{var, bounds, mad, range, iqr, mean, median}
 ## @end deftypefn
 
-function retval = std (x, opt = 0, dim)
-
-  if (nargin < 1)
-    print_usage ();
-  endif
-
-  if (! (isnumeric (x) || islogical (x)))
-    error ("std: X must be a numeric vector or matrix");
-  endif
-
-  if (isempty (opt))
-    opt = 0;
-  elseif (! isscalar (opt) || (opt != 0 && opt != 1))
-    error ("std: normalization OPT must be 0 or 1");
-  endif
+function retval = std (varargin)
 
-  nd = ndims (x);
-  sz = size (x);
-  if (nargin < 3)
-    ## Find the first non-singleton dimension.
-    (dim = find (sz > 1, 1)) || (dim = 1);
-  else
-    if (! (isscalar (dim) && dim == fix (dim) && dim > 0))
-      error ("std: DIM must be an integer and a valid dimension");
-    endif
-  endif
-
-  n = size (x, dim);
-  if (n == 1 || isempty (x))
-    if (isa (x, "single"))
-      retval = zeros (sz, "single");
-    else
-      retval = zeros (sz);
-    endif
-  else
-    retval = sqrt (sumsq (center (x, dim), dim) / (n - 1 + opt));
-  endif
+  retval = sqrt (var (varargin{:}));
 
 endfunction
 
@@ -121,14 +106,33 @@
 %!assert (std ([1 2], 1), 0.5, 5*eps)
 %!assert (std (1), 0)
 %!assert (std (single (1)), single (0))
-%!assert (std ([]), [])
-%!assert (std (ones (1,3,0,2)), ones (1,3,0,2))
 %!assert (std ([1 2 3], [], 3), [0 0 0])
 
+##Test empty inputs
+%!assert (std ([]), NaN)
+%!assert (std ([],[],1), NaN(1,0))
+%!assert (std ([],[],2), NaN(0,1))
+%!assert (std ([],[],3), [])
+%!assert (std (ones (0,1)), NaN)
+%!assert (std (ones (1,0)), NaN)
+%!assert (std (ones (1,0), [], 1), NaN(1,0))
+%!assert (std (ones (1,0), [], 2), NaN)
+%!assert (std (ones (1,0), [], 3), NaN(1,0))
+%!assert (std (ones (0,1)), NaN)
+%!assert (std (ones (0,1), [], 1), NaN)
+%!assert (std (ones (0,1), [], 2), NaN(0,1))
+%!assert (std (ones (0,1), [], 3), NaN(0,1))
+%!assert (std (ones (1,3,0,2)), NaN(1,1,0,2))
+%!assert (std (ones (1,3,0,2), [], 1), NaN(1,3,0,2))
+%!assert (std (ones (1,3,0,2), [], 2), NaN(1,1,0,2))
+%!assert (std (ones (1,3,0,2), [], 3), NaN(1,3,1,2))
+%!assert (std (ones (1,3,0,2), [], 4), NaN(1,3,0))
+
+
 ## Test input validation
 %!error <Invalid call> std ()
 %!error <X must be a numeric> std (['A'; 'B'])
-%!error <OPT must be 0 or 1> std (1, 2)
-%!error <DIM must be an integer> std (1, [], ones (2,2))
-%!error <DIM must be an integer> std (1, [], 1.5)
-%!error <DIM must be .* a valid dimension> std (1, [], 0)
+%!error <W must be 0> std ([1 2], 2)
+%!error <DIM must be a positive integer> std (1, [], ones (2,2))
+%!error <DIM must be a positive integer> std (1, [], 1.5)
+%!error <DIM must be a positive integer> std (1, [], 0)
--- a/scripts/statistics/var.m	Tue Nov 30 11:04:27 2021 -0500
+++ b/scripts/statistics/var.m	Wed Dec 01 12:42:08 2021 -0500
@@ -25,8 +25,9 @@
 
 ## -*- texinfo -*-
 ## @deftypefn  {} {} var (@var{x})
-## @deftypefnx {} {} var (@var{x}, @var{opt})
-## @deftypefnx {} {} var (@var{x}, @var{opt}, @var{dim})
+## @deftypefnx {} {} var (@var{x}, @var{w})
+## @deftypefnx {} {} var (@var{x}, @var{w}, @var{dim})
+## @deftypefnx {} {} var (@var{x}, @var{w}, @qcode{"ALL"})
 ## Compute the variance of the elements of the vector @var{x}.
 ##
 ## The variance is defined as
@@ -50,85 +51,225 @@
 ## where @math{N} is the length of the @var{x} vector.
 ##
 ## @end ifnottex
-## If @var{x} is a matrix, compute the variance for each column and return
-## them in a row vector.
+## If @var{x} is an array, compute the variance for each column and return
+## them in a row vector (or for an n-D array, the result is returned as
+## an array of dimension 1 x n x m x @dots{}).
 ##
-## The argument @var{opt} determines the type of normalization to use.
-## Valid values are
+## The optional argument @var{w} determines the weighting scheme to use.  Valid
+## values are
 ##
 ## @table @asis
-## @item 0:
-##   normalize with @math{N-1}, provides the best unbiased estimator of the
-## variance [default]
+## @item 0 [default]:
+## Normalize with @math{N-1}.  This provides the square root of the best
+## unbiased estimator of the variance.
 ##
 ## @item 1:
-##   normalizes with @math{N}, this provides the second moment around the mean
+## Normalize with @math{N}, this provides the square root of the second moment
+## around the mean
+##
+## @item a vector:
+## Compute the weighted variance with nonnegative scalar weights.  The length of
+## @var{w} must be equal to the size of @var{x} along dimension @var{dim}.
 ## @end table
 ##
-## If @math{N} is equal to 1 the value of @var{opt} is ignored and
+## If @math{N} is equal to 1 the value of @var{W} is ignored and
 ## normalization by @math{N} is used.
 ##
-## If the optional argument @var{dim} is given, operate along this dimension.
+## The optional variable @var{dim} forces @code{var} to operate over the
+## specified dimension.  @var{dim} can either be a scalar dimension or a vector
+## of non-repeating dimensions over which to operate.  Dimensions must be
+## positive integers, and the variance is calculated over the array slice
+## defined by @var{dim}.
+##
+## Specifying dimension @qcode{"ALL"} will force @code{var} to operate on all
+## elements of @var{x}, and is equivalent to @code{var (@var{x}(:))}.
+##
+## When @var{dim} is a vector or @qcode{"ALL"}, @var{w} must be either 0 or 1.
 ## @seealso{cov, std, skewness, kurtosis, moment}
 ## @end deftypefn
 
-function retval = var (x, opt = 0, dim)
+function retval = var (x, w = 0, dim)
 
   if (nargin < 1)
     print_usage ();
+  elseif (nargin < 3)
+    dim = [];
   endif
 
   if (! (isnumeric (x) || islogical (x)))
     error ("var: X must be a numeric vector or matrix");
   endif
 
-  if (isempty (opt))
-    opt = 0;
-  elseif (opt != 0 && opt != 1)
-    error ("var: normalization OPT must be 0 or 1");
-  endif
-
   nd = ndims (x);
   sz = size (x);
-  if (nargin < 3)
+  emptydimflag = false;
+
+  if (isempty (dim))
+    emptydimflag = true;  ## Compatibliity hack for empty x, ndims==2
     ## Find the first non-singleton dimension.
-    (dim = find (sz > 1, 1)) || (dim = 1);
+   (dim = find (sz != 1, 1)) || (dim = 1);
+
   else
     if (! (isscalar (dim) && dim == fix (dim) && dim > 0))
-      error ("var: DIM must be an integer and a valid dimension");
+      if (isvector (dim) &&
+          isnumeric (dim) &&
+          all (dim > 0) &&
+          all (rem (dim, 1) == 0))
+        if (dim != unique (dim, "stable"))
+          error (["var: vector DIM must contain non-repeating positive"...
+                  "integers"]);
+        endif
+        ## Check W
+        if (! isscalar (w))
+          error ("var: W must be either 0 or 1 when DIM is a vector");
+        endif
+
+        ## Reshape X to compute the variance over an array slice
+        if (iscolumn (dim))
+          dim = transpose (dim);
+        endif
+
+        collapsed_dims = dim;
+        dim = dim(end);
+
+        ## Permute X to cluster the dimensions to collapse
+        highest_dim = max ([nd, collapsed_dims]);
+        perm_start = perm_end = [1:highest_dim];
+        perm_start(dim:end) = [];
+        perm_start(ismember (perm_start, collapsed_dims)) = [];
+        perm_end(1:dim) = [];
+        perm_end(ismember (perm_end, collapsed_dims)) = [];
+        perm = [perm_start, collapsed_dims, perm_end];
+
+        x = permute (x, perm);
+
+        ## Collapse the given dimensions
+        newshape = ones (1, highest_dim);
+        newshape(1:nd) = sz;
+        newshape(collapsed_dims(1:(end - 1))) = 1;
+        newshape(dim) = prod (sz(collapsed_dims));
+
+        ## New X with collapsed dimensions
+        x = reshape (x, newshape);
+      elseif (ischar (dim) &&
+              strcmp (tolower (dim), "all"))
+        ## Check W
+        if (! isscalar (w))
+          error ("var: W must be either 0 or 1 when using 'ALL' as dimension");
+        endif
+
+        ## "ALL" equals to collapsing all elements to a single vector
+        x = x(:);
+        dim = 1;
+        sz = size (x);
+      else
+        error ("var: DIM must be a positive integer scalar, vector, or 'all'");
+      endif
     endif
   endif
 
   n = size (x, dim);
-  if (n == 1)
-    if (isa (x, "single"))
-      retval = zeros (sz, "single");
+  if (isempty (w))
+    w = 0;
+  elseif (! isvector (w) ||
+          ! isnumeric (w) ||
+          (isvector (w) && any (w < 0)) ||
+          (isscalar (w) && ((w != 0 && w != 1) && (n != 1))))
+    error ("var: W must be 0, 1, or a vector of positive integers");
+  endif
+
+  if (isempty (x))
+    if (emptydimflag && isequal (sz, [0 0]))
+      retval = NaN;
     else
-      retval = zeros (sz);
+      output_size = sz;
+      output_size(dim) = 1;
+      retval = NaN(output_size);
     endif
-  elseif (numel (x) > 0)
-    retval = sumsq (center (x, dim), dim) / (n - 1 + opt);
   else
-    error ("var: X must not be empty");
+    if (n == 1)
+      if (! isscalar (w))
+        error (["var: the length of W must be equal to the size of X "...
+                  "in the dimension along which variance is calculated"])
+      else
+        if (isa (x, "single"))
+          retval = zeros (sz, "single");
+        else
+          retval = zeros (sz);
+        endif
+      endif
+    else
+      if (isscalar (w))
+        retval = sumsq (center (x, dim), dim) / (n - 1 + w);
+      else
+        ## Weighted variance
+        if (length (w) != n)
+          error (["var: the length of W must be equal to the size of X "...
+                  "in the dimension along which variance is calculated"]);
+        else
+          if ((dim == 1 && rows (w) == 1) ||
+              (dim == 2 && columns (w) == 1))
+            w = transpose (w);
+          elseif (dim > 2)
+            newdims = [(ones (1, (dim - 1))), (length (w))];
+            w = reshape (w, newdims);
+          endif
+          den = sum (w);
+          mu = sum (w .* x, dim) ./ sum (w);
+          retval = sum (w .* ((x .- mu) .^ 2), dim) / den;
+        endif
+      endif
+    endif
   endif
 
 endfunction
 
-
 %!assert (var (13), 0)
 %!assert (var (single (13)), single (0))
 %!assert (var ([1,2,3]), 1)
 %!assert (var ([1,2,3], 1), 2/3, eps)
 %!assert (var ([1,2,3], [], 1), [0,0,0])
 %!assert (var ([1,2,3], [], 3), [0,0,0])
+%!assert (var (5, 99), 0)
+%!assert (var (5, 99, 1), 0)
+%!assert (var (5, 99, 2), 0)
+%!assert (var ([1:7], [1:7]), 3)
+%!assert (var ([eye(3)], [1:3]), [5/36, 2/9, 1/4], eps)
+%!assert (var (ones (2,2,2), [1:2], 3), [(zeros (2,2))])
+%!assert (var ([1 2; 3 4], 0, 'all'), var ([1:4]))
+%!assert (var (reshape ([1:8], 2, 2, 2), 0, [1 3]), [17/3 17/3], eps)
+
+##Test empty inputs
+%!assert (var ([]), NaN)
+%!assert (var ([],[],1), NaN(1,0))
+%!assert (var ([],[],2), NaN(0,1))
+%!assert (var ([],[],3), [])
+%!assert (var (ones (0,1)), NaN)
+%!assert (var (ones (1,0)), NaN)
+%!assert (var (ones (1,0), [], 1), NaN(1,0))
+%!assert (var (ones (1,0), [], 2), NaN)
+%!assert (var (ones (1,0), [], 3), NaN(1,0))
+%!assert (var (ones (0,1)), NaN)
+%!assert (var (ones (0,1), [], 1), NaN)
+%!assert (var (ones (0,1), [], 2), NaN(0,1))
+%!assert (var (ones (0,1), [], 3), NaN(0,1))
+%!assert (var (ones (1,3,0,2)), NaN(1,1,0,2))
+%!assert (var (ones (1,3,0,2), [], 1), NaN(1,3,0,2))
+%!assert (var (ones (1,3,0,2), [], 2), NaN(1,1,0,2))
+%!assert (var (ones (1,3,0,2), [], 3), NaN(1,3,1,2))
+%!assert (var (ones (1,3,0,2), [], 4), NaN(1,3,0))
 
 ## Test input validation
 %!error <Invalid call> var ()
 %!error <X must be a numeric> var (['A'; 'B'])
-%!error <OPT must be 0 or 1> var (1, -1)
-%!error <FLAG must be 0 or 1> skewness (1, 2)
-%!error <FLAG must be 0 or 1> skewness (1, [1 0])
-%!error <DIM must be an integer> var (1, [], ones (2,2))
-%!error <DIM must be an integer> var (1, [], 1.5)
-%!error <DIM must be .* a valid dimension> var (1, [], 0)
-%!error <X must not be empty> var ([], 1)
+%!error <W must be 0> var ([1 2 3], 2)
+%!error <W must be .* a vector of positive integers> var ([1 2], [-1 0])
+%!error <W must be .* a vector of positive integers> var ([1 2], eye (2))
+%!error <W must be either 0 or 1> var (ones (2, 2), [1 2], [1 2])
+%!error <W must be either 0 or 1> var ([1 2], [1 2], 'all')
+%!error <the length of W must be> var ([1 2], [1 2 3])
+%!error <the length of W must be> var (1, [1 2])
+%!error <the length of W must be> var ([1 2], [1 2], 1)
+%!error <DIM must be a positive integer> var (1, [], ones (2,2))
+%!error <DIM must be a positive integer> var (1, [], 1.5)
+%!error <DIM must be a positive integer> var (1, [], 0)