Mercurial > octave
changeset 30410:bc0de453fb6a stable
extend var and std with dim options (bug #58116) and weights (patch #10103)
* /scripts/statistics/var.m: adds scalar and vector weights, vector and "all"
dim options, compatible empty input handling. Also updates BISTs and dosctring.
* /scripts/statistics/std.m: simplifies std as simple sqrt(var(x)) wrapper,
inheriting the extensions to var. Updates docstring and BISTs accordingly.
* /NEWS: notes improved matlab compatibility
author | Stefano Guidoni <ilguido@users.sf.net> |
---|---|
date | Wed, 01 Dec 2021 12:42:08 -0500 |
parents | 597275db9c7f |
children | 0fee9e910d84 |
files | NEWS scripts/statistics/std.m scripts/statistics/var.m |
diffstat | 3 files changed, 240 insertions(+), 91 deletions(-) [+] |
line wrap: on
line diff
--- a/NEWS Tue Nov 30 11:04:27 2021 -0500 +++ b/NEWS Wed Dec 01 12:42:08 2021 -0500 @@ -263,6 +263,10 @@ - The function `repelem` now produces a row vector output when the input is a scalar. +- The functions `var` and `std` now accept a weight vector as input and +compute the weigthed variance. Dimension input now allows a vector and +the keyword "all". + ### Alphabetical list of new functions added in Octave 7 * `cospi`
--- a/scripts/statistics/std.m Tue Nov 30 11:04:27 2021 -0500 +++ b/scripts/statistics/std.m Wed Dec 01 12:42:08 2021 -0500 @@ -25,8 +25,9 @@ ## -*- texinfo -*- ## @deftypefn {} {} std (@var{x}) -## @deftypefnx {} {} std (@var{x}, @var{opt}) -## @deftypefnx {} {} std (@var{x}, @var{opt}, @var{dim}) +## @deftypefnx {} {} std (@var{x}, @var{w}) +## @deftypefnx {} {} std (@var{x}, @var{w}, @var{dim}) +## @deftypefnx {} {} std (@var{x}, @var{w}, @qcode{"ALL"}) ## Compute the standard deviation of the elements of the vector @var{x}. ## ## The standard deviation is defined as @@ -48,63 +49,47 @@ ## where @math{N} is the number of elements of the @var{x} vector. ## @end ifnottex ## -## If @var{x} is a matrix, compute the standard deviation for each column and -## return them in a row vector. +## If @var{x} is an array, compute the standard deviation for each column and +## return them in a row vector (or for an n-D array, the result is returned as +## an array of dimension 1 x n x m x @dots{}). ## -## The argument @var{opt} determines the type of normalization to use. -## Valid values are +## The optional argument @var{w} determines the weighting scheme to use. Valid +## values are: ## ## @table @asis -## @item 0: -## normalize with @math{N-1}, provides the square root of the best unbiased -## estimator of the variance [default] +## @item 0 [default]: +## Normalize with @math{N-1}. This provides the square root of the best +## unbiased estimator of the variance. ## ## @item 1: -## normalize with @math{N}, this provides the square root of the second -## moment around the mean +## Normalize with @math{N}. This provides the square root of the second moment +## around the mean. +## +## @item a vector: +## Compute the weighted standard deviation with nonnegative scalar weights. The +## length of @var{w} must be equal to the size of @var{x} along dimension +## @var{dim}. ## @end table ## -## If the optional argument @var{dim} is given, operate along this dimension. +## If @math{N} is equal to 1 the value of @var{W} is ignored and +## normalization by @math{N} is used. +## +## The optional variable @var{dim} forces @code{std} to operate over the +## specified dimension. @var{dim} can either be a scalar dimension or a vector +## of non-repeating dimensions over which to operate. Dimensions must be +## positive integers, and the standard deviation is calculated over the array +## slice defined by @var{dim}. +## +## Specifying dimension @qcode{"ALL"} will force @code{std} to operate on all +## elements of @var{x}, and is equivalent to @code{std (@var{x}(:))}. +## +## When @var{dim} is a vector or @qcode{"ALL"}, @var{w} must be either 0 or 1. ## @seealso{var, bounds, mad, range, iqr, mean, median} ## @end deftypefn -function retval = std (x, opt = 0, dim) - - if (nargin < 1) - print_usage (); - endif - - if (! (isnumeric (x) || islogical (x))) - error ("std: X must be a numeric vector or matrix"); - endif - - if (isempty (opt)) - opt = 0; - elseif (! isscalar (opt) || (opt != 0 && opt != 1)) - error ("std: normalization OPT must be 0 or 1"); - endif +function retval = std (varargin) - nd = ndims (x); - sz = size (x); - if (nargin < 3) - ## Find the first non-singleton dimension. - (dim = find (sz > 1, 1)) || (dim = 1); - else - if (! (isscalar (dim) && dim == fix (dim) && dim > 0)) - error ("std: DIM must be an integer and a valid dimension"); - endif - endif - - n = size (x, dim); - if (n == 1 || isempty (x)) - if (isa (x, "single")) - retval = zeros (sz, "single"); - else - retval = zeros (sz); - endif - else - retval = sqrt (sumsq (center (x, dim), dim) / (n - 1 + opt)); - endif + retval = sqrt (var (varargin{:})); endfunction @@ -121,14 +106,33 @@ %!assert (std ([1 2], 1), 0.5, 5*eps) %!assert (std (1), 0) %!assert (std (single (1)), single (0)) -%!assert (std ([]), []) -%!assert (std (ones (1,3,0,2)), ones (1,3,0,2)) %!assert (std ([1 2 3], [], 3), [0 0 0]) +##Test empty inputs +%!assert (std ([]), NaN) +%!assert (std ([],[],1), NaN(1,0)) +%!assert (std ([],[],2), NaN(0,1)) +%!assert (std ([],[],3), []) +%!assert (std (ones (0,1)), NaN) +%!assert (std (ones (1,0)), NaN) +%!assert (std (ones (1,0), [], 1), NaN(1,0)) +%!assert (std (ones (1,0), [], 2), NaN) +%!assert (std (ones (1,0), [], 3), NaN(1,0)) +%!assert (std (ones (0,1)), NaN) +%!assert (std (ones (0,1), [], 1), NaN) +%!assert (std (ones (0,1), [], 2), NaN(0,1)) +%!assert (std (ones (0,1), [], 3), NaN(0,1)) +%!assert (std (ones (1,3,0,2)), NaN(1,1,0,2)) +%!assert (std (ones (1,3,0,2), [], 1), NaN(1,3,0,2)) +%!assert (std (ones (1,3,0,2), [], 2), NaN(1,1,0,2)) +%!assert (std (ones (1,3,0,2), [], 3), NaN(1,3,1,2)) +%!assert (std (ones (1,3,0,2), [], 4), NaN(1,3,0)) + + ## Test input validation %!error <Invalid call> std () %!error <X must be a numeric> std (['A'; 'B']) -%!error <OPT must be 0 or 1> std (1, 2) -%!error <DIM must be an integer> std (1, [], ones (2,2)) -%!error <DIM must be an integer> std (1, [], 1.5) -%!error <DIM must be .* a valid dimension> std (1, [], 0) +%!error <W must be 0> std ([1 2], 2) +%!error <DIM must be a positive integer> std (1, [], ones (2,2)) +%!error <DIM must be a positive integer> std (1, [], 1.5) +%!error <DIM must be a positive integer> std (1, [], 0)
--- a/scripts/statistics/var.m Tue Nov 30 11:04:27 2021 -0500 +++ b/scripts/statistics/var.m Wed Dec 01 12:42:08 2021 -0500 @@ -25,8 +25,9 @@ ## -*- texinfo -*- ## @deftypefn {} {} var (@var{x}) -## @deftypefnx {} {} var (@var{x}, @var{opt}) -## @deftypefnx {} {} var (@var{x}, @var{opt}, @var{dim}) +## @deftypefnx {} {} var (@var{x}, @var{w}) +## @deftypefnx {} {} var (@var{x}, @var{w}, @var{dim}) +## @deftypefnx {} {} var (@var{x}, @var{w}, @qcode{"ALL"}) ## Compute the variance of the elements of the vector @var{x}. ## ## The variance is defined as @@ -50,85 +51,225 @@ ## where @math{N} is the length of the @var{x} vector. ## ## @end ifnottex -## If @var{x} is a matrix, compute the variance for each column and return -## them in a row vector. +## If @var{x} is an array, compute the variance for each column and return +## them in a row vector (or for an n-D array, the result is returned as +## an array of dimension 1 x n x m x @dots{}). ## -## The argument @var{opt} determines the type of normalization to use. -## Valid values are +## The optional argument @var{w} determines the weighting scheme to use. Valid +## values are ## ## @table @asis -## @item 0: -## normalize with @math{N-1}, provides the best unbiased estimator of the -## variance [default] +## @item 0 [default]: +## Normalize with @math{N-1}. This provides the square root of the best +## unbiased estimator of the variance. ## ## @item 1: -## normalizes with @math{N}, this provides the second moment around the mean +## Normalize with @math{N}, this provides the square root of the second moment +## around the mean +## +## @item a vector: +## Compute the weighted variance with nonnegative scalar weights. The length of +## @var{w} must be equal to the size of @var{x} along dimension @var{dim}. ## @end table ## -## If @math{N} is equal to 1 the value of @var{opt} is ignored and +## If @math{N} is equal to 1 the value of @var{W} is ignored and ## normalization by @math{N} is used. ## -## If the optional argument @var{dim} is given, operate along this dimension. +## The optional variable @var{dim} forces @code{var} to operate over the +## specified dimension. @var{dim} can either be a scalar dimension or a vector +## of non-repeating dimensions over which to operate. Dimensions must be +## positive integers, and the variance is calculated over the array slice +## defined by @var{dim}. +## +## Specifying dimension @qcode{"ALL"} will force @code{var} to operate on all +## elements of @var{x}, and is equivalent to @code{var (@var{x}(:))}. +## +## When @var{dim} is a vector or @qcode{"ALL"}, @var{w} must be either 0 or 1. ## @seealso{cov, std, skewness, kurtosis, moment} ## @end deftypefn -function retval = var (x, opt = 0, dim) +function retval = var (x, w = 0, dim) if (nargin < 1) print_usage (); + elseif (nargin < 3) + dim = []; endif if (! (isnumeric (x) || islogical (x))) error ("var: X must be a numeric vector or matrix"); endif - if (isempty (opt)) - opt = 0; - elseif (opt != 0 && opt != 1) - error ("var: normalization OPT must be 0 or 1"); - endif - nd = ndims (x); sz = size (x); - if (nargin < 3) + emptydimflag = false; + + if (isempty (dim)) + emptydimflag = true; ## Compatibliity hack for empty x, ndims==2 ## Find the first non-singleton dimension. - (dim = find (sz > 1, 1)) || (dim = 1); + (dim = find (sz != 1, 1)) || (dim = 1); + else if (! (isscalar (dim) && dim == fix (dim) && dim > 0)) - error ("var: DIM must be an integer and a valid dimension"); + if (isvector (dim) && + isnumeric (dim) && + all (dim > 0) && + all (rem (dim, 1) == 0)) + if (dim != unique (dim, "stable")) + error (["var: vector DIM must contain non-repeating positive"... + "integers"]); + endif + ## Check W + if (! isscalar (w)) + error ("var: W must be either 0 or 1 when DIM is a vector"); + endif + + ## Reshape X to compute the variance over an array slice + if (iscolumn (dim)) + dim = transpose (dim); + endif + + collapsed_dims = dim; + dim = dim(end); + + ## Permute X to cluster the dimensions to collapse + highest_dim = max ([nd, collapsed_dims]); + perm_start = perm_end = [1:highest_dim]; + perm_start(dim:end) = []; + perm_start(ismember (perm_start, collapsed_dims)) = []; + perm_end(1:dim) = []; + perm_end(ismember (perm_end, collapsed_dims)) = []; + perm = [perm_start, collapsed_dims, perm_end]; + + x = permute (x, perm); + + ## Collapse the given dimensions + newshape = ones (1, highest_dim); + newshape(1:nd) = sz; + newshape(collapsed_dims(1:(end - 1))) = 1; + newshape(dim) = prod (sz(collapsed_dims)); + + ## New X with collapsed dimensions + x = reshape (x, newshape); + elseif (ischar (dim) && + strcmp (tolower (dim), "all")) + ## Check W + if (! isscalar (w)) + error ("var: W must be either 0 or 1 when using 'ALL' as dimension"); + endif + + ## "ALL" equals to collapsing all elements to a single vector + x = x(:); + dim = 1; + sz = size (x); + else + error ("var: DIM must be a positive integer scalar, vector, or 'all'"); + endif endif endif n = size (x, dim); - if (n == 1) - if (isa (x, "single")) - retval = zeros (sz, "single"); + if (isempty (w)) + w = 0; + elseif (! isvector (w) || + ! isnumeric (w) || + (isvector (w) && any (w < 0)) || + (isscalar (w) && ((w != 0 && w != 1) && (n != 1)))) + error ("var: W must be 0, 1, or a vector of positive integers"); + endif + + if (isempty (x)) + if (emptydimflag && isequal (sz, [0 0])) + retval = NaN; else - retval = zeros (sz); + output_size = sz; + output_size(dim) = 1; + retval = NaN(output_size); endif - elseif (numel (x) > 0) - retval = sumsq (center (x, dim), dim) / (n - 1 + opt); else - error ("var: X must not be empty"); + if (n == 1) + if (! isscalar (w)) + error (["var: the length of W must be equal to the size of X "... + "in the dimension along which variance is calculated"]) + else + if (isa (x, "single")) + retval = zeros (sz, "single"); + else + retval = zeros (sz); + endif + endif + else + if (isscalar (w)) + retval = sumsq (center (x, dim), dim) / (n - 1 + w); + else + ## Weighted variance + if (length (w) != n) + error (["var: the length of W must be equal to the size of X "... + "in the dimension along which variance is calculated"]); + else + if ((dim == 1 && rows (w) == 1) || + (dim == 2 && columns (w) == 1)) + w = transpose (w); + elseif (dim > 2) + newdims = [(ones (1, (dim - 1))), (length (w))]; + w = reshape (w, newdims); + endif + den = sum (w); + mu = sum (w .* x, dim) ./ sum (w); + retval = sum (w .* ((x .- mu) .^ 2), dim) / den; + endif + endif + endif endif endfunction - %!assert (var (13), 0) %!assert (var (single (13)), single (0)) %!assert (var ([1,2,3]), 1) %!assert (var ([1,2,3], 1), 2/3, eps) %!assert (var ([1,2,3], [], 1), [0,0,0]) %!assert (var ([1,2,3], [], 3), [0,0,0]) +%!assert (var (5, 99), 0) +%!assert (var (5, 99, 1), 0) +%!assert (var (5, 99, 2), 0) +%!assert (var ([1:7], [1:7]), 3) +%!assert (var ([eye(3)], [1:3]), [5/36, 2/9, 1/4], eps) +%!assert (var (ones (2,2,2), [1:2], 3), [(zeros (2,2))]) +%!assert (var ([1 2; 3 4], 0, 'all'), var ([1:4])) +%!assert (var (reshape ([1:8], 2, 2, 2), 0, [1 3]), [17/3 17/3], eps) + +##Test empty inputs +%!assert (var ([]), NaN) +%!assert (var ([],[],1), NaN(1,0)) +%!assert (var ([],[],2), NaN(0,1)) +%!assert (var ([],[],3), []) +%!assert (var (ones (0,1)), NaN) +%!assert (var (ones (1,0)), NaN) +%!assert (var (ones (1,0), [], 1), NaN(1,0)) +%!assert (var (ones (1,0), [], 2), NaN) +%!assert (var (ones (1,0), [], 3), NaN(1,0)) +%!assert (var (ones (0,1)), NaN) +%!assert (var (ones (0,1), [], 1), NaN) +%!assert (var (ones (0,1), [], 2), NaN(0,1)) +%!assert (var (ones (0,1), [], 3), NaN(0,1)) +%!assert (var (ones (1,3,0,2)), NaN(1,1,0,2)) +%!assert (var (ones (1,3,0,2), [], 1), NaN(1,3,0,2)) +%!assert (var (ones (1,3,0,2), [], 2), NaN(1,1,0,2)) +%!assert (var (ones (1,3,0,2), [], 3), NaN(1,3,1,2)) +%!assert (var (ones (1,3,0,2), [], 4), NaN(1,3,0)) ## Test input validation %!error <Invalid call> var () %!error <X must be a numeric> var (['A'; 'B']) -%!error <OPT must be 0 or 1> var (1, -1) -%!error <FLAG must be 0 or 1> skewness (1, 2) -%!error <FLAG must be 0 or 1> skewness (1, [1 0]) -%!error <DIM must be an integer> var (1, [], ones (2,2)) -%!error <DIM must be an integer> var (1, [], 1.5) -%!error <DIM must be .* a valid dimension> var (1, [], 0) -%!error <X must not be empty> var ([], 1) +%!error <W must be 0> var ([1 2 3], 2) +%!error <W must be .* a vector of positive integers> var ([1 2], [-1 0]) +%!error <W must be .* a vector of positive integers> var ([1 2], eye (2)) +%!error <W must be either 0 or 1> var (ones (2, 2), [1 2], [1 2]) +%!error <W must be either 0 or 1> var ([1 2], [1 2], 'all') +%!error <the length of W must be> var ([1 2], [1 2 3]) +%!error <the length of W must be> var (1, [1 2]) +%!error <the length of W must be> var ([1 2], [1 2], 1) +%!error <DIM must be a positive integer> var (1, [], ones (2,2)) +%!error <DIM must be a positive integer> var (1, [], 1.5) +%!error <DIM must be a positive integer> var (1, [], 0)