octave: scripts/statistics/mean.m comparison

comparison scripts/statistics/mean.m @ 31421:21962c678648

mean.m: more Matlab compatibility (bugs #58116, #50571). Background: This improved function was released with the Statistics Package. * scripts/statistics/mean.m: now accepts vector dimensions and options to handle `NaN` values. The option `"a"` (arithmetic mean), `"g"` (geometric mean), and `"h"` (harmonic mean) are no longer accepted, only the arithmetic mean is computed. For the geometric and harmonic mean, please use respective functions `geomean` and `harmmean` from the Octave Statistics package. * etc/NEWS.8.md: announce changes. Co-authored-by: Kai T. Ohlhus <k.ohlhus@gmail.com>

author	Andreas Bertsatos <abertsatos@biol.uoa.gr>
date	Sun, 13 Nov 2022 01:44:54 +0900
parents	5d3faba0342e
children	9ee8943cad01

comparison

equal deleted inserted replaced

-:e98fb9b4be86
+:21962c678648
 ##
 ########################################################################
 ## -*- texinfo -*-
 ## @deftypefn  {} {@var{y} =} mean (@var{x})
+## @deftypefnx {} {@var{y} =} mean (@var{x}, "all")
 ## @deftypefnx {} {@var{y} =} mean (@var{x}, @var{dim})
-## @deftypefnx {} {@var{y} =} mean (@var{x}, @var{opt})
+## @deftypefnx {} {@var{y} =} mean (@var{x}, @var{vecdim})
-## @deftypefnx {} {@var{y} =} mean (@var{x}, @var{dim}, @var{opt})
 ## @deftypefnx {} {@var{y} =} mean (@dots{}, @var{outtype})
-## Compute the mean of the elements of the vector @var{x}.
+## @deftypefnx {} {@var{y} =} mean (@dots{}, @var{nanflag})
-##
+## Compute the mean of the elements of @var{x}.
-## The mean is defined as
+##
-##
+## @itemize
+## @item
+## If @var{x} is a vector, then @code{mean(@var{x})} returns the
+## mean of the elements in @var{x} defined as
 ## @tex
 ## $$ {\rm mean}(x) = \bar{x} = {1\over N} \sum_{i=1}^N x_i $$
 ## where $N$ is the number of elements of @var{x}.
 ##
 ## @end tex
 ##
 ## @noindent
 ## where @math{N} is the length of the @var{x} vector.
 ##
 ## @end ifnottex
-## If @var{x} is a matrix, compute the mean for each column and return them
+##
-## in a row vector.
+## @item
-##
+## If @var{x} is a matrix, then @code{mean(@var{x})} returns a row vector
-## If the optional argument @var{dim} is given, operate along this dimension.
+## with the mean of each columns in @var{x}.
 ##
-## The optional argument @var{opt} selects the type of mean to compute.
+## @item
-## The following options are recognized:
+## If @var{x} is a multidimensional array, then @code{mean(@var{x})}
-##
+## operates along the first nonsingleton dimension of @var{x}.
-## @table @asis
+## @end itemize
-## @item @qcode{"a"}
+##
-## Compute the (ordinary) arithmetic mean.  [default]
+## @code{mean(@var{x}, "all")} returns the mean of all the elements in @var{x}.
 ##
-## @item @qcode{"g"}
+## @code{mean(@var{x}, @var{dim})} returns the mean along the
-## Compute the geometric mean.
+## operating dimension @var{dim} of @var{x}.
 ##
-## @item @qcode{"h"}
+## @code{mean(@var{x}, @var{vecdim})} returns the mean over the
-## Compute the harmonic mean.
+## dimensions specified in the vector @var{vecdim}.  For example, if @var{x}
-## @end table
+## is a 2-by-3-by-4 array, then @code{mean(@var{x}, [1 2])} returns a 1-by-4
-##
+## array.  Each element of the output array is the mean of the elements on
-## The optional argument @var{outtype} selects the data type of the
+## the corresponding page of @var{x}.  NOTE! @var{vecdim} MUST index at least
-## output value.  The following options are recognized:
+## N-2 dimensions of @var{x}, where @code{N = length (size (@var{x}))} and
-##
+## N < 8.  If @var{vecdim} indexes all dimensions of @var{x}, then it is
-## @table @asis
+## equivalent to @code{mean(@var{x}, "all")}.
-## @item @qcode{"default"}
+##
-## Output will be of class double unless @var{x} is of class single,
+## @code{mean(@dots{}, @var{outtype})} returns the mean with a specified data
-## in which case the output will also be single.
+## type, using any of the input arguments in the previous syntaxes.
-##
+## @var{outtype} can be "default", "double", or "native".
-## @item @qcode{"double"}
+##
-## Output will be of class double.
+## @code{mean(@dots{}, @var{nanflag})} specifies whether to exclude NaN values
-##
+## from the calculation, using any of the input argument combinations in
-## @item @qcode{"native"}
+## previous syntaxes.  By default, NaN values are included in the calculation
-## Output will be the same class as @var{x} unless @var{x} is of class
+## (@var{nanflag} has the value "includenan").  To exclude NaN values, set the
-## logical in which case it returns of class double.
+## value of @var{nanflag} to "omitnan".
 ##
-## @end table
-##
-## Both @var{dim} and @var{opt} are optional.  If both are supplied, either
-## may appear first.
 ## @seealso{median, mode}
 ## @end deftypefn
 function y = mean (x, varargin)
-if (nargin < 1 || nargin > 4)
+if (nargin < 1 || nargin > 4 || any (cellfun (@isnumeric, varargin(2:end))))
 print_usage ();
 endif
-if (! (isnumeric (x) || islogical (x)))
+## Check all char arguments.
-error ("mean: X must be a numeric vector or matrix");
+all_flag = false;
-endif
+omitnan = false;
-nd = ndims (x);
+outtype = "default";
-sz = size (x);
+for i = 1:length (varargin)
-## We support too many options...
+if (ischar (varargin{i}))
+switch (varargin{i})
-## If OUTTYPE is set, it must be the last option.  If DIM and
+case "all"
-## MEAN_TYPE exist, they must be the first two options
+all_flag = true;
+case "omitnan"
-out_type = "default";
+omitnan = true;
-if (numel (varargin))
+case "includenan"
-maybe_out_type = tolower (varargin{end});
+omitnan = false;
-if (any (strcmpi (maybe_out_type, {"default", "double", "native"})))
+case {"default", "double", "native"}
-out_type = maybe_out_type;
+outtype = varargin{i};
-varargin(end) = [];
+otherwise
+print_usage ();
+endswitch
 endif
-endif
+endfor
+varargin(cellfun (@ischar, varargin)) = [];
-scalars = cellfun (@isscalar, varargin);
-chars = cellfun (@ischar, varargin);
+if (((length (varargin) == 1) && ! (isnumeric (varargin{1}))) ...
-numerics = cellfun (@isnumeric, varargin);
+|| (length (varargin) > 1))
-dim_mask = numerics & scalars;
-mean_type_mask = chars & scalars;
-if (! all (dim_mask | mean_type_mask))
 print_usage ();
 endif
-switch (nnz (dim_mask))
+if (! (isnumeric (x) || islogical (x)))
-case 0 # Find the first non-singleton dimension
+error ("mean: X must be either a numeric or boolean vector or matrix");
-(dim = find (sz > 1, 1)) || (dim = 1);
+endif
-case 1
-dim = varargin{dim_mask};
+if (length (varargin) == 0)
-if (dim != fix (dim) || dim < 1)
-error ("mean: DIM must be an integer and a valid dimension");
+## Single numeric input argument, no dimensions given.
-endif
+if (all_flag)
-otherwise
+n = length (x(:));
-print_usage ();
+if (omitnan)
-endswitch
+n = length (x(! isnan (x)));
+x(isnan (x)) = 0;
-switch (nnz (mean_type_mask))
+endif
-case 0
+y = sum (x(:), 1) ./ n;
-mean_type = "a";
+else
-case 1
+sz = size (x);
-mean_type = varargin{mean_type_mask};
+dim = find (sz > 1, 1);
-otherwise
+if length (dim) == 0
-print_usage ();
+dim = 1;
-endswitch
+endif
+n = size (x, dim);
-## The actual mean computation
+if (omitnan)
-n = size (x, dim);
+n = sum (! isnan (x), dim);
-switch (mean_type)
+x(isnan (x)) = 0;
-case "a"
+endif
-y = sum (x, dim) / n;
+y = sum (x, dim) ./ n;
-case "g"
+endif
-if (! any (x(:) < 0))
-y = exp (sum (log (x), dim) ./ n);
+else
-else
-error ("mean: X must not contain any negative values");
+## Two numeric input arguments, dimensions given.  Note scalar is vector!
-endif
+vecdim = varargin{1};
-case "h"
+if (! (isvector (vecdim) && all (vecdim)) || any (rem (vecdim, 1)))
-y = n ./ sum (1 ./ x, dim);
+error ("mean: Dimension must be a positive integer scalar or vector");
-otherwise
+endif
-error ("mean: mean type '%s' not recognized", mean_type);
-endswitch
+if (isscalar (vecdim))
+n = size (x, vecdim);
+if (omitnan)
+n = sum (! isnan (x), vecdim);
+x(isnan (x)) = 0;
+endif
+y = sum (x, vecdim) ./ n;
+else
+sz = size (x);
+ndims = length (sz);
+misdim = [1:ndims];
+## keep remaining dimensions
+for i = 1:length (vecdim)
+misdim(misdim == vecdim(i)) = [];
+endfor
+switch (length (misdim))
+## if all dimensions are given, compute x(:)
+case 0
+n = length (x(:));
+if (omitnan)
+n = length (x(! isnan (x)));
+x(isnan (x)) = 0;
+endif
+y = sum (x(:), 1) ./ n;
+## for 1 dimension left, return column vector
+case 1
+x = permute (x, [misdim, vecdim]);
+for i = 1:size (x, 1)
+x_vec = x(i,:,:,:,:,:,:)(:);
+if (omitnan)
+x_vec = x_vec(! isnan (x_vec));
+endif
+y(i) = sum (x_vec, 1) ./ length (x_vec);
+endfor
+## for 2 dimensions left, return matrix
+case 2
+x = permute (x, [misdim, vecdim]);
+for i = 1:size (x, 1)
+for j = 1:size (x, 2)
+x_vec = x(i,j,:,:,:,:,:)(:);
+if (omitnan)
+x_vec = x_vec(! isnan (x_vec));
+endif
+y(i,j) = sum (x_vec, 1) ./ length (x_vec);
+endfor
+endfor
+## for more that 2 dimensions left, print usage
+otherwise
+error ("vecdim must index at least N-2 dimensions of X");
+endswitch
+endif
+endif
 ## Convert output as requested
-switch (out_type)
+switch (outtype)
 case "default"
 ## do nothing, the operators already do the right thing
 case "double"
 y = double (y);
 case "native"
-if (islogical (x))
+if (! islogical (x))
-## ignore it, return double anyway
-else
 y = cast (y, class (x));
 endif
 otherwise
-## this should have been filtered out during input check, but...
+error ("mean: OUTTYPE '%s' not recognized", outtype);
-error ("mean: OUTTYPE '%s' not recognized", out_type);
 endswitch
 endfunction
 %! z = [y, y+10];
 %! assert (mean (x), 0);
 %! assert (mean (y), 0);
 %! assert (mean (z), [0, 10]);
-## Test small numbers
-%!assert (mean (repmat (0.1,1,1000), "g"), 0.1, 20*eps)
 %!assert (mean (magic (3), 1), [5, 5, 5])
 %!assert (mean (magic (3), 2), [5; 5; 5])
-%!assert (mean ([2 8], "g"), 4)
-%!assert (mean ([4 4 2], "h"), 3)
 %!assert (mean (logical ([1 0 1 1])), 0.75)
 %!assert (mean (single ([1 0 1 1])), single (0.75))
 %!assert (mean ([1 2], 3), [1 2])
 ## Test input validation
 %!error <Invalid call to mean.  Correct usage is> mean ()
+%!error <Invalid call to mean.  Correct usage is> mean (1, 2, 3)
 %!error <Invalid call to mean.  Correct usage is> mean (1, 2, 3, 4)
-%!error <X must be a numeric> mean ({1:5})
+%!error <Invalid call to mean.  Correct usage is> mean (1, "all", 3)
-%!error <Invalid call to mean.  Correct usage is> mean (1, 2, 3)
+%!error <Invalid call to mean.  Correct usage is> mean (1, "b")
-%!error <Invalid call to mean.  Correct usage is> mean (1, ones (2,2))
-%!error <DIM must be an integer> mean (1, 1.5)
-%!error <DIM must be .* a valid dimension> mean (1, 0)
-%!error <X must not contain any negative values> mean ([1 -1], "g")
-%!error <mean type 'b' not recognized> mean (1, "b")
 %!error <Invalid call to mean.  Correct usage is> mean (1, 1, "foo")
+%!error <X must be either a numeric or boolean> mean ({1:5})
+%!error <X must be either a numeric or boolean> mean ("char")
+%!error <Dimension must be a positive integer> mean (1, ones (2,2))
+%!error <Dimension must be a positive integer> mean (1, 1.5)
+%!error <Dimension must be a positive integer> mean (1, 0)
+%!error <vecdim must index at least N-2 dimensions of X> ...
+%!  mean (repmat ([1:20;6:25], [5 2 6 3 5]), [1 2])
 ## Test outtype option
 %!test
 %! in = [1 2 3];
 %! out = 2;
 %! in = logical ([1 0 1]);
 %! out = 2/3;
 %! assert (mean (in, "default"), mean (in));
 %! assert (mean (in, "default"), out);
 %! assert (mean (in, "native"), out);  # logical ignores native option
+## Test single input and optional arguments "all", DIM, "omitnan")
+%!test
+%! x = [-10:10];
+%! y = [x;x+5;x-5];
+%! assert (mean (x), 0);
+%! assert (mean (y, 2), [0, 5, -5]');
+%! assert (mean (y, "all"), 0);
+%! y(2,4) = NaN;
+%! assert (mean (y', "omitnan"), [0 5.35 -5]);
+%! z = y + 20;
+%! assert (mean (z, "all"), NaN);
+%! m = [20 NaN 15];
+%! assert (mean (z'), m);
+%! assert (mean (z', "includenan"), m);
+%! m = [20 25.35 15];
+%! assert (mean (z', "omitnan"), m);
+%! assert (mean (z, 2, "omitnan"), m');
+%! assert (mean (z, 2, "native", "omitnan"), m');
+%! assert (mean (z, 2, "omitnan", "native"), m');
+## Test boolean input
+%!test
+%! assert (mean (true, "all"), 1);
+%! assert (mean (false), 0);
+%! assert (mean ([true false true]), 2/3, 4e-14);
+%! assert (mean ([true false true], 1), [1 0 1]);
+%! assert (mean ([true false NaN], 1), [1 0 NaN]);
+%! assert (mean ([true false NaN], 2), NaN);
+%! assert (mean ([true false NaN], 2, "omitnan"), 0.5);
+%! assert (mean ([true false NaN], 2, "omitnan", "native"), 0.5);
+## Test dimension indexing with vecdim in n-dimensional arrays
+%!test
+%! x = repmat ([1:20;6:25], [5 2 6 3]);
+%! assert (size (mean (x, [3 2])), [10 3]);
+%! assert (size (mean (x, [1 2])), [6 3]);
+%! assert (size (mean (x, [1 2 4])), [1 6]);
+%! assert (size (mean (x, [1 4 3])), [1 40]);
+%! assert (size (mean (x, [1 2 3 4])), [1 1]);
+## Test results with vecdim in n-dimensional arrays and "omitnan"
+%!test
+%! x = repmat ([1:20;6:25], [5 2 6 3]);
+%! m = repmat ([10.5;15.5], [5,3]);
+%! assert (mean (x, [3 2]), m, 4e-14);
+%! x(2,5,6,3) = NaN;
+%! m(2,3) = NaN;
+%! assert (mean (x, [3 2]), m, 4e-14);
+%! m(2,3) = 15.52301255230125;
+%! assert (mean (x, [3 2], "omitnan"), m, 4e-14);

Mercurial > octave

comparison scripts/statistics/mean.m @ 31421:21962c678648