comparison scripts/statistics/mean.m @ 31421:21962c678648

mean.m: more Matlab compatibility (bugs #58116, #50571). Background: This improved function was released with the Statistics Package. * scripts/statistics/mean.m: now accepts vector dimensions and options to handle `NaN` values. The option `"a"` (arithmetic mean), `"g"` (geometric mean), and `"h"` (harmonic mean) are no longer accepted, only the arithmetic mean is computed. For the geometric and harmonic mean, please use respective functions `geomean` and `harmmean` from the Octave Statistics package. * etc/NEWS.8.md: announce changes. Co-authored-by: Kai T. Ohlhus <k.ohlhus@gmail.com>
author Andreas Bertsatos <abertsatos@biol.uoa.gr>
date Sun, 13 Nov 2022 01:44:54 +0900
parents 5d3faba0342e
children 9ee8943cad01
comparison
equal deleted inserted replaced
31420:e98fb9b4be86 31421:21962c678648
23 ## 23 ##
24 ######################################################################## 24 ########################################################################
25 25
26 ## -*- texinfo -*- 26 ## -*- texinfo -*-
27 ## @deftypefn {} {@var{y} =} mean (@var{x}) 27 ## @deftypefn {} {@var{y} =} mean (@var{x})
28 ## @deftypefnx {} {@var{y} =} mean (@var{x}, "all")
28 ## @deftypefnx {} {@var{y} =} mean (@var{x}, @var{dim}) 29 ## @deftypefnx {} {@var{y} =} mean (@var{x}, @var{dim})
29 ## @deftypefnx {} {@var{y} =} mean (@var{x}, @var{opt}) 30 ## @deftypefnx {} {@var{y} =} mean (@var{x}, @var{vecdim})
30 ## @deftypefnx {} {@var{y} =} mean (@var{x}, @var{dim}, @var{opt})
31 ## @deftypefnx {} {@var{y} =} mean (@dots{}, @var{outtype}) 31 ## @deftypefnx {} {@var{y} =} mean (@dots{}, @var{outtype})
32 ## Compute the mean of the elements of the vector @var{x}. 32 ## @deftypefnx {} {@var{y} =} mean (@dots{}, @var{nanflag})
33 ## 33 ## Compute the mean of the elements of @var{x}.
34 ## The mean is defined as 34 ##
35 ## 35 ## @itemize
36 ## @item
37 ## If @var{x} is a vector, then @code{mean(@var{x})} returns the
38 ## mean of the elements in @var{x} defined as
36 ## @tex 39 ## @tex
37 ## $$ {\rm mean}(x) = \bar{x} = {1\over N} \sum_{i=1}^N x_i $$ 40 ## $$ {\rm mean}(x) = \bar{x} = {1\over N} \sum_{i=1}^N x_i $$
38 ## where $N$ is the number of elements of @var{x}. 41 ## where $N$ is the number of elements of @var{x}.
39 ## 42 ##
40 ## @end tex 43 ## @end tex
46 ## 49 ##
47 ## @noindent 50 ## @noindent
48 ## where @math{N} is the length of the @var{x} vector. 51 ## where @math{N} is the length of the @var{x} vector.
49 ## 52 ##
50 ## @end ifnottex 53 ## @end ifnottex
51 ## If @var{x} is a matrix, compute the mean for each column and return them 54 ##
52 ## in a row vector. 55 ## @item
53 ## 56 ## If @var{x} is a matrix, then @code{mean(@var{x})} returns a row vector
54 ## If the optional argument @var{dim} is given, operate along this dimension. 57 ## with the mean of each columns in @var{x}.
55 ## 58 ##
56 ## The optional argument @var{opt} selects the type of mean to compute. 59 ## @item
57 ## The following options are recognized: 60 ## If @var{x} is a multidimensional array, then @code{mean(@var{x})}
58 ## 61 ## operates along the first nonsingleton dimension of @var{x}.
59 ## @table @asis 62 ## @end itemize
60 ## @item @qcode{"a"} 63 ##
61 ## Compute the (ordinary) arithmetic mean. [default] 64 ## @code{mean(@var{x}, "all")} returns the mean of all the elements in @var{x}.
62 ## 65 ##
63 ## @item @qcode{"g"} 66 ## @code{mean(@var{x}, @var{dim})} returns the mean along the
64 ## Compute the geometric mean. 67 ## operating dimension @var{dim} of @var{x}.
65 ## 68 ##
66 ## @item @qcode{"h"} 69 ## @code{mean(@var{x}, @var{vecdim})} returns the mean over the
67 ## Compute the harmonic mean. 70 ## dimensions specified in the vector @var{vecdim}. For example, if @var{x}
68 ## @end table 71 ## is a 2-by-3-by-4 array, then @code{mean(@var{x}, [1 2])} returns a 1-by-4
69 ## 72 ## array. Each element of the output array is the mean of the elements on
70 ## The optional argument @var{outtype} selects the data type of the 73 ## the corresponding page of @var{x}. NOTE! @var{vecdim} MUST index at least
71 ## output value. The following options are recognized: 74 ## N-2 dimensions of @var{x}, where @code{N = length (size (@var{x}))} and
72 ## 75 ## N < 8. If @var{vecdim} indexes all dimensions of @var{x}, then it is
73 ## @table @asis 76 ## equivalent to @code{mean(@var{x}, "all")}.
74 ## @item @qcode{"default"} 77 ##
75 ## Output will be of class double unless @var{x} is of class single, 78 ## @code{mean(@dots{}, @var{outtype})} returns the mean with a specified data
76 ## in which case the output will also be single. 79 ## type, using any of the input arguments in the previous syntaxes.
77 ## 80 ## @var{outtype} can be "default", "double", or "native".
78 ## @item @qcode{"double"} 81 ##
79 ## Output will be of class double. 82 ## @code{mean(@dots{}, @var{nanflag})} specifies whether to exclude NaN values
80 ## 83 ## from the calculation, using any of the input argument combinations in
81 ## @item @qcode{"native"} 84 ## previous syntaxes. By default, NaN values are included in the calculation
82 ## Output will be the same class as @var{x} unless @var{x} is of class 85 ## (@var{nanflag} has the value "includenan"). To exclude NaN values, set the
83 ## logical in which case it returns of class double. 86 ## value of @var{nanflag} to "omitnan".
84 ## 87 ##
85 ## @end table
86 ##
87 ## Both @var{dim} and @var{opt} are optional. If both are supplied, either
88 ## may appear first.
89 ## @seealso{median, mode} 88 ## @seealso{median, mode}
90 ## @end deftypefn 89 ## @end deftypefn
91 90
92 function y = mean (x, varargin) 91 function y = mean (x, varargin)
93 92
94 if (nargin < 1 || nargin > 4) 93 if (nargin < 1 || nargin > 4 || any (cellfun (@isnumeric, varargin(2:end))))
95 print_usage (); 94 print_usage ();
96 endif 95 endif
97 96
98 if (! (isnumeric (x) || islogical (x))) 97 ## Check all char arguments.
99 error ("mean: X must be a numeric vector or matrix"); 98 all_flag = false;
100 endif 99 omitnan = false;
101 nd = ndims (x); 100 outtype = "default";
102 sz = size (x); 101
103 102 for i = 1:length (varargin)
104 ## We support too many options... 103 if (ischar (varargin{i}))
105 104 switch (varargin{i})
106 ## If OUTTYPE is set, it must be the last option. If DIM and 105 case "all"
107 ## MEAN_TYPE exist, they must be the first two options 106 all_flag = true;
108 107 case "omitnan"
109 out_type = "default"; 108 omitnan = true;
110 if (numel (varargin)) 109 case "includenan"
111 maybe_out_type = tolower (varargin{end}); 110 omitnan = false;
112 if (any (strcmpi (maybe_out_type, {"default", "double", "native"}))) 111 case {"default", "double", "native"}
113 out_type = maybe_out_type; 112 outtype = varargin{i};
114 varargin(end) = []; 113 otherwise
114 print_usage ();
115 endswitch
115 endif 116 endif
116 endif 117 endfor
117 118 varargin(cellfun (@ischar, varargin)) = [];
118 scalars = cellfun (@isscalar, varargin); 119
119 chars = cellfun (@ischar, varargin); 120 if (((length (varargin) == 1) && ! (isnumeric (varargin{1}))) ...
120 numerics = cellfun (@isnumeric, varargin); 121 || (length (varargin) > 1))
121
122 dim_mask = numerics & scalars;
123 mean_type_mask = chars & scalars;
124 if (! all (dim_mask | mean_type_mask))
125 print_usage (); 122 print_usage ();
126 endif 123 endif
127 124
128 switch (nnz (dim_mask)) 125 if (! (isnumeric (x) || islogical (x)))
129 case 0 # Find the first non-singleton dimension 126 error ("mean: X must be either a numeric or boolean vector or matrix");
130 (dim = find (sz > 1, 1)) || (dim = 1); 127 endif
131 case 1 128
132 dim = varargin{dim_mask}; 129 if (length (varargin) == 0)
133 if (dim != fix (dim) || dim < 1) 130
134 error ("mean: DIM must be an integer and a valid dimension"); 131 ## Single numeric input argument, no dimensions given.
135 endif 132 if (all_flag)
136 otherwise 133 n = length (x(:));
137 print_usage (); 134 if (omitnan)
138 endswitch 135 n = length (x(! isnan (x)));
139 136 x(isnan (x)) = 0;
140 switch (nnz (mean_type_mask)) 137 endif
141 case 0 138 y = sum (x(:), 1) ./ n;
142 mean_type = "a"; 139 else
143 case 1 140 sz = size (x);
144 mean_type = varargin{mean_type_mask}; 141 dim = find (sz > 1, 1);
145 otherwise 142 if length (dim) == 0
146 print_usage (); 143 dim = 1;
147 endswitch 144 endif
148 145 n = size (x, dim);
149 ## The actual mean computation 146 if (omitnan)
150 n = size (x, dim); 147 n = sum (! isnan (x), dim);
151 switch (mean_type) 148 x(isnan (x)) = 0;
152 case "a" 149 endif
153 y = sum (x, dim) / n; 150 y = sum (x, dim) ./ n;
154 case "g" 151 endif
155 if (! any (x(:) < 0)) 152
156 y = exp (sum (log (x), dim) ./ n); 153 else
157 else 154
158 error ("mean: X must not contain any negative values"); 155 ## Two numeric input arguments, dimensions given. Note scalar is vector!
159 endif 156 vecdim = varargin{1};
160 case "h" 157 if (! (isvector (vecdim) && all (vecdim)) || any (rem (vecdim, 1)))
161 y = n ./ sum (1 ./ x, dim); 158 error ("mean: Dimension must be a positive integer scalar or vector");
162 otherwise 159 endif
163 error ("mean: mean type '%s' not recognized", mean_type); 160
164 endswitch 161 if (isscalar (vecdim))
162
163 n = size (x, vecdim);
164 if (omitnan)
165 n = sum (! isnan (x), vecdim);
166 x(isnan (x)) = 0;
167 endif
168 y = sum (x, vecdim) ./ n;
169
170 else
171
172 sz = size (x);
173 ndims = length (sz);
174 misdim = [1:ndims];
175
176 ## keep remaining dimensions
177 for i = 1:length (vecdim)
178 misdim(misdim == vecdim(i)) = [];
179 endfor
180
181 switch (length (misdim))
182 ## if all dimensions are given, compute x(:)
183 case 0
184 n = length (x(:));
185 if (omitnan)
186 n = length (x(! isnan (x)));
187 x(isnan (x)) = 0;
188 endif
189 y = sum (x(:), 1) ./ n;
190
191 ## for 1 dimension left, return column vector
192 case 1
193 x = permute (x, [misdim, vecdim]);
194 for i = 1:size (x, 1)
195 x_vec = x(i,:,:,:,:,:,:)(:);
196 if (omitnan)
197 x_vec = x_vec(! isnan (x_vec));
198 endif
199 y(i) = sum (x_vec, 1) ./ length (x_vec);
200 endfor
201
202 ## for 2 dimensions left, return matrix
203 case 2
204 x = permute (x, [misdim, vecdim]);
205 for i = 1:size (x, 1)
206 for j = 1:size (x, 2)
207 x_vec = x(i,j,:,:,:,:,:)(:);
208 if (omitnan)
209 x_vec = x_vec(! isnan (x_vec));
210 endif
211 y(i,j) = sum (x_vec, 1) ./ length (x_vec);
212 endfor
213 endfor
214
215 ## for more that 2 dimensions left, print usage
216 otherwise
217 error ("vecdim must index at least N-2 dimensions of X");
218 endswitch
219 endif
220
221 endif
165 222
166 ## Convert output as requested 223 ## Convert output as requested
167 switch (out_type) 224 switch (outtype)
168 case "default" 225 case "default"
169 ## do nothing, the operators already do the right thing 226 ## do nothing, the operators already do the right thing
170 case "double" 227 case "double"
171 y = double (y); 228 y = double (y);
172 case "native" 229 case "native"
173 if (islogical (x)) 230 if (! islogical (x))
174 ## ignore it, return double anyway
175 else
176 y = cast (y, class (x)); 231 y = cast (y, class (x));
177 endif 232 endif
178 otherwise 233 otherwise
179 ## this should have been filtered out during input check, but... 234 error ("mean: OUTTYPE '%s' not recognized", outtype);
180 error ("mean: OUTTYPE '%s' not recognized", out_type);
181 endswitch 235 endswitch
182 236
183 endfunction 237 endfunction
184 238
185 239
189 %! z = [y, y+10]; 243 %! z = [y, y+10];
190 %! assert (mean (x), 0); 244 %! assert (mean (x), 0);
191 %! assert (mean (y), 0); 245 %! assert (mean (y), 0);
192 %! assert (mean (z), [0, 10]); 246 %! assert (mean (z), [0, 10]);
193 247
194 ## Test small numbers
195 %!assert (mean (repmat (0.1,1,1000), "g"), 0.1, 20*eps)
196
197 %!assert (mean (magic (3), 1), [5, 5, 5]) 248 %!assert (mean (magic (3), 1), [5, 5, 5])
198 %!assert (mean (magic (3), 2), [5; 5; 5]) 249 %!assert (mean (magic (3), 2), [5; 5; 5])
199 %!assert (mean ([2 8], "g"), 4)
200 %!assert (mean ([4 4 2], "h"), 3)
201 %!assert (mean (logical ([1 0 1 1])), 0.75) 250 %!assert (mean (logical ([1 0 1 1])), 0.75)
202 %!assert (mean (single ([1 0 1 1])), single (0.75)) 251 %!assert (mean (single ([1 0 1 1])), single (0.75))
203 %!assert (mean ([1 2], 3), [1 2]) 252 %!assert (mean ([1 2], 3), [1 2])
204 253
205 ## Test input validation 254 ## Test input validation
206 %!error <Invalid call to mean. Correct usage is> mean () 255 %!error <Invalid call to mean. Correct usage is> mean ()
256 %!error <Invalid call to mean. Correct usage is> mean (1, 2, 3)
207 %!error <Invalid call to mean. Correct usage is> mean (1, 2, 3, 4) 257 %!error <Invalid call to mean. Correct usage is> mean (1, 2, 3, 4)
208 %!error <X must be a numeric> mean ({1:5}) 258 %!error <Invalid call to mean. Correct usage is> mean (1, "all", 3)
209 %!error <Invalid call to mean. Correct usage is> mean (1, 2, 3) 259 %!error <Invalid call to mean. Correct usage is> mean (1, "b")
210 %!error <Invalid call to mean. Correct usage is> mean (1, ones (2,2))
211 %!error <DIM must be an integer> mean (1, 1.5)
212 %!error <DIM must be .* a valid dimension> mean (1, 0)
213 %!error <X must not contain any negative values> mean ([1 -1], "g")
214 %!error <mean type 'b' not recognized> mean (1, "b")
215 %!error <Invalid call to mean. Correct usage is> mean (1, 1, "foo") 260 %!error <Invalid call to mean. Correct usage is> mean (1, 1, "foo")
261 %!error <X must be either a numeric or boolean> mean ({1:5})
262 %!error <X must be either a numeric or boolean> mean ("char")
263 %!error <Dimension must be a positive integer> mean (1, ones (2,2))
264 %!error <Dimension must be a positive integer> mean (1, 1.5)
265 %!error <Dimension must be a positive integer> mean (1, 0)
266 %!error <vecdim must index at least N-2 dimensions of X> ...
267 %! mean (repmat ([1:20;6:25], [5 2 6 3 5]), [1 2])
216 268
217 ## Test outtype option 269 ## Test outtype option
218 %!test 270 %!test
219 %! in = [1 2 3]; 271 %! in = [1 2 3];
220 %! out = 2; 272 %! out = 2;
238 %! in = logical ([1 0 1]); 290 %! in = logical ([1 0 1]);
239 %! out = 2/3; 291 %! out = 2/3;
240 %! assert (mean (in, "default"), mean (in)); 292 %! assert (mean (in, "default"), mean (in));
241 %! assert (mean (in, "default"), out); 293 %! assert (mean (in, "default"), out);
242 %! assert (mean (in, "native"), out); # logical ignores native option 294 %! assert (mean (in, "native"), out); # logical ignores native option
295
296 ## Test single input and optional arguments "all", DIM, "omitnan")
297 %!test
298 %! x = [-10:10];
299 %! y = [x;x+5;x-5];
300 %! assert (mean (x), 0);
301 %! assert (mean (y, 2), [0, 5, -5]');
302 %! assert (mean (y, "all"), 0);
303 %! y(2,4) = NaN;
304 %! assert (mean (y', "omitnan"), [0 5.35 -5]);
305 %! z = y + 20;
306 %! assert (mean (z, "all"), NaN);
307 %! m = [20 NaN 15];
308 %! assert (mean (z'), m);
309 %! assert (mean (z', "includenan"), m);
310 %! m = [20 25.35 15];
311 %! assert (mean (z', "omitnan"), m);
312 %! assert (mean (z, 2, "omitnan"), m');
313 %! assert (mean (z, 2, "native", "omitnan"), m');
314 %! assert (mean (z, 2, "omitnan", "native"), m');
315
316 ## Test boolean input
317 %!test
318 %! assert (mean (true, "all"), 1);
319 %! assert (mean (false), 0);
320 %! assert (mean ([true false true]), 2/3, 4e-14);
321 %! assert (mean ([true false true], 1), [1 0 1]);
322 %! assert (mean ([true false NaN], 1), [1 0 NaN]);
323 %! assert (mean ([true false NaN], 2), NaN);
324 %! assert (mean ([true false NaN], 2, "omitnan"), 0.5);
325 %! assert (mean ([true false NaN], 2, "omitnan", "native"), 0.5);
326
327 ## Test dimension indexing with vecdim in n-dimensional arrays
328 %!test
329 %! x = repmat ([1:20;6:25], [5 2 6 3]);
330 %! assert (size (mean (x, [3 2])), [10 3]);
331 %! assert (size (mean (x, [1 2])), [6 3]);
332 %! assert (size (mean (x, [1 2 4])), [1 6]);
333 %! assert (size (mean (x, [1 4 3])), [1 40]);
334 %! assert (size (mean (x, [1 2 3 4])), [1 1]);
335
336 ## Test results with vecdim in n-dimensional arrays and "omitnan"
337 %!test
338 %! x = repmat ([1:20;6:25], [5 2 6 3]);
339 %! m = repmat ([10.5;15.5], [5,3]);
340 %! assert (mean (x, [3 2]), m, 4e-14);
341 %! x(2,5,6,3) = NaN;
342 %! m(2,3) = NaN;
343 %! assert (mean (x, [3 2]), m, 4e-14);
344 %! m(2,3) = 15.52301255230125;
345 %! assert (mean (x, [3 2], "omitnan"), m, 4e-14);