Mercurial > octave
comparison scripts/statistics/mean.m @ 31421:21962c678648
mean.m: more Matlab compatibility (bugs #58116, #50571).
Background: This improved function was released with the Statistics Package.
* scripts/statistics/mean.m: now accepts vector dimensions and options to
handle `NaN` values. The option `"a"` (arithmetic mean), `"g"` (geometric
mean), and `"h"` (harmonic mean) are no longer accepted, only the arithmetic
mean is computed. For the geometric and harmonic mean, please use respective
functions `geomean` and `harmmean` from the Octave Statistics package.
* etc/NEWS.8.md: announce changes.
Co-authored-by: Kai T. Ohlhus <k.ohlhus@gmail.com>
author | Andreas Bertsatos <abertsatos@biol.uoa.gr> |
---|---|
date | Sun, 13 Nov 2022 01:44:54 +0900 |
parents | 5d3faba0342e |
children | 9ee8943cad01 |
comparison
equal
deleted
inserted
replaced
31420:e98fb9b4be86 | 31421:21962c678648 |
---|---|
23 ## | 23 ## |
24 ######################################################################## | 24 ######################################################################## |
25 | 25 |
26 ## -*- texinfo -*- | 26 ## -*- texinfo -*- |
27 ## @deftypefn {} {@var{y} =} mean (@var{x}) | 27 ## @deftypefn {} {@var{y} =} mean (@var{x}) |
28 ## @deftypefnx {} {@var{y} =} mean (@var{x}, "all") | |
28 ## @deftypefnx {} {@var{y} =} mean (@var{x}, @var{dim}) | 29 ## @deftypefnx {} {@var{y} =} mean (@var{x}, @var{dim}) |
29 ## @deftypefnx {} {@var{y} =} mean (@var{x}, @var{opt}) | 30 ## @deftypefnx {} {@var{y} =} mean (@var{x}, @var{vecdim}) |
30 ## @deftypefnx {} {@var{y} =} mean (@var{x}, @var{dim}, @var{opt}) | |
31 ## @deftypefnx {} {@var{y} =} mean (@dots{}, @var{outtype}) | 31 ## @deftypefnx {} {@var{y} =} mean (@dots{}, @var{outtype}) |
32 ## Compute the mean of the elements of the vector @var{x}. | 32 ## @deftypefnx {} {@var{y} =} mean (@dots{}, @var{nanflag}) |
33 ## | 33 ## Compute the mean of the elements of @var{x}. |
34 ## The mean is defined as | 34 ## |
35 ## | 35 ## @itemize |
36 ## @item | |
37 ## If @var{x} is a vector, then @code{mean(@var{x})} returns the | |
38 ## mean of the elements in @var{x} defined as | |
36 ## @tex | 39 ## @tex |
37 ## $$ {\rm mean}(x) = \bar{x} = {1\over N} \sum_{i=1}^N x_i $$ | 40 ## $$ {\rm mean}(x) = \bar{x} = {1\over N} \sum_{i=1}^N x_i $$ |
38 ## where $N$ is the number of elements of @var{x}. | 41 ## where $N$ is the number of elements of @var{x}. |
39 ## | 42 ## |
40 ## @end tex | 43 ## @end tex |
46 ## | 49 ## |
47 ## @noindent | 50 ## @noindent |
48 ## where @math{N} is the length of the @var{x} vector. | 51 ## where @math{N} is the length of the @var{x} vector. |
49 ## | 52 ## |
50 ## @end ifnottex | 53 ## @end ifnottex |
51 ## If @var{x} is a matrix, compute the mean for each column and return them | 54 ## |
52 ## in a row vector. | 55 ## @item |
53 ## | 56 ## If @var{x} is a matrix, then @code{mean(@var{x})} returns a row vector |
54 ## If the optional argument @var{dim} is given, operate along this dimension. | 57 ## with the mean of each columns in @var{x}. |
55 ## | 58 ## |
56 ## The optional argument @var{opt} selects the type of mean to compute. | 59 ## @item |
57 ## The following options are recognized: | 60 ## If @var{x} is a multidimensional array, then @code{mean(@var{x})} |
58 ## | 61 ## operates along the first nonsingleton dimension of @var{x}. |
59 ## @table @asis | 62 ## @end itemize |
60 ## @item @qcode{"a"} | 63 ## |
61 ## Compute the (ordinary) arithmetic mean. [default] | 64 ## @code{mean(@var{x}, "all")} returns the mean of all the elements in @var{x}. |
62 ## | 65 ## |
63 ## @item @qcode{"g"} | 66 ## @code{mean(@var{x}, @var{dim})} returns the mean along the |
64 ## Compute the geometric mean. | 67 ## operating dimension @var{dim} of @var{x}. |
65 ## | 68 ## |
66 ## @item @qcode{"h"} | 69 ## @code{mean(@var{x}, @var{vecdim})} returns the mean over the |
67 ## Compute the harmonic mean. | 70 ## dimensions specified in the vector @var{vecdim}. For example, if @var{x} |
68 ## @end table | 71 ## is a 2-by-3-by-4 array, then @code{mean(@var{x}, [1 2])} returns a 1-by-4 |
69 ## | 72 ## array. Each element of the output array is the mean of the elements on |
70 ## The optional argument @var{outtype} selects the data type of the | 73 ## the corresponding page of @var{x}. NOTE! @var{vecdim} MUST index at least |
71 ## output value. The following options are recognized: | 74 ## N-2 dimensions of @var{x}, where @code{N = length (size (@var{x}))} and |
72 ## | 75 ## N < 8. If @var{vecdim} indexes all dimensions of @var{x}, then it is |
73 ## @table @asis | 76 ## equivalent to @code{mean(@var{x}, "all")}. |
74 ## @item @qcode{"default"} | 77 ## |
75 ## Output will be of class double unless @var{x} is of class single, | 78 ## @code{mean(@dots{}, @var{outtype})} returns the mean with a specified data |
76 ## in which case the output will also be single. | 79 ## type, using any of the input arguments in the previous syntaxes. |
77 ## | 80 ## @var{outtype} can be "default", "double", or "native". |
78 ## @item @qcode{"double"} | 81 ## |
79 ## Output will be of class double. | 82 ## @code{mean(@dots{}, @var{nanflag})} specifies whether to exclude NaN values |
80 ## | 83 ## from the calculation, using any of the input argument combinations in |
81 ## @item @qcode{"native"} | 84 ## previous syntaxes. By default, NaN values are included in the calculation |
82 ## Output will be the same class as @var{x} unless @var{x} is of class | 85 ## (@var{nanflag} has the value "includenan"). To exclude NaN values, set the |
83 ## logical in which case it returns of class double. | 86 ## value of @var{nanflag} to "omitnan". |
84 ## | 87 ## |
85 ## @end table | |
86 ## | |
87 ## Both @var{dim} and @var{opt} are optional. If both are supplied, either | |
88 ## may appear first. | |
89 ## @seealso{median, mode} | 88 ## @seealso{median, mode} |
90 ## @end deftypefn | 89 ## @end deftypefn |
91 | 90 |
92 function y = mean (x, varargin) | 91 function y = mean (x, varargin) |
93 | 92 |
94 if (nargin < 1 || nargin > 4) | 93 if (nargin < 1 || nargin > 4 || any (cellfun (@isnumeric, varargin(2:end)))) |
95 print_usage (); | 94 print_usage (); |
96 endif | 95 endif |
97 | 96 |
98 if (! (isnumeric (x) || islogical (x))) | 97 ## Check all char arguments. |
99 error ("mean: X must be a numeric vector or matrix"); | 98 all_flag = false; |
100 endif | 99 omitnan = false; |
101 nd = ndims (x); | 100 outtype = "default"; |
102 sz = size (x); | 101 |
103 | 102 for i = 1:length (varargin) |
104 ## We support too many options... | 103 if (ischar (varargin{i})) |
105 | 104 switch (varargin{i}) |
106 ## If OUTTYPE is set, it must be the last option. If DIM and | 105 case "all" |
107 ## MEAN_TYPE exist, they must be the first two options | 106 all_flag = true; |
108 | 107 case "omitnan" |
109 out_type = "default"; | 108 omitnan = true; |
110 if (numel (varargin)) | 109 case "includenan" |
111 maybe_out_type = tolower (varargin{end}); | 110 omitnan = false; |
112 if (any (strcmpi (maybe_out_type, {"default", "double", "native"}))) | 111 case {"default", "double", "native"} |
113 out_type = maybe_out_type; | 112 outtype = varargin{i}; |
114 varargin(end) = []; | 113 otherwise |
114 print_usage (); | |
115 endswitch | |
115 endif | 116 endif |
116 endif | 117 endfor |
117 | 118 varargin(cellfun (@ischar, varargin)) = []; |
118 scalars = cellfun (@isscalar, varargin); | 119 |
119 chars = cellfun (@ischar, varargin); | 120 if (((length (varargin) == 1) && ! (isnumeric (varargin{1}))) ... |
120 numerics = cellfun (@isnumeric, varargin); | 121 || (length (varargin) > 1)) |
121 | |
122 dim_mask = numerics & scalars; | |
123 mean_type_mask = chars & scalars; | |
124 if (! all (dim_mask | mean_type_mask)) | |
125 print_usage (); | 122 print_usage (); |
126 endif | 123 endif |
127 | 124 |
128 switch (nnz (dim_mask)) | 125 if (! (isnumeric (x) || islogical (x))) |
129 case 0 # Find the first non-singleton dimension | 126 error ("mean: X must be either a numeric or boolean vector or matrix"); |
130 (dim = find (sz > 1, 1)) || (dim = 1); | 127 endif |
131 case 1 | 128 |
132 dim = varargin{dim_mask}; | 129 if (length (varargin) == 0) |
133 if (dim != fix (dim) || dim < 1) | 130 |
134 error ("mean: DIM must be an integer and a valid dimension"); | 131 ## Single numeric input argument, no dimensions given. |
135 endif | 132 if (all_flag) |
136 otherwise | 133 n = length (x(:)); |
137 print_usage (); | 134 if (omitnan) |
138 endswitch | 135 n = length (x(! isnan (x))); |
139 | 136 x(isnan (x)) = 0; |
140 switch (nnz (mean_type_mask)) | 137 endif |
141 case 0 | 138 y = sum (x(:), 1) ./ n; |
142 mean_type = "a"; | 139 else |
143 case 1 | 140 sz = size (x); |
144 mean_type = varargin{mean_type_mask}; | 141 dim = find (sz > 1, 1); |
145 otherwise | 142 if length (dim) == 0 |
146 print_usage (); | 143 dim = 1; |
147 endswitch | 144 endif |
148 | 145 n = size (x, dim); |
149 ## The actual mean computation | 146 if (omitnan) |
150 n = size (x, dim); | 147 n = sum (! isnan (x), dim); |
151 switch (mean_type) | 148 x(isnan (x)) = 0; |
152 case "a" | 149 endif |
153 y = sum (x, dim) / n; | 150 y = sum (x, dim) ./ n; |
154 case "g" | 151 endif |
155 if (! any (x(:) < 0)) | 152 |
156 y = exp (sum (log (x), dim) ./ n); | 153 else |
157 else | 154 |
158 error ("mean: X must not contain any negative values"); | 155 ## Two numeric input arguments, dimensions given. Note scalar is vector! |
159 endif | 156 vecdim = varargin{1}; |
160 case "h" | 157 if (! (isvector (vecdim) && all (vecdim)) || any (rem (vecdim, 1))) |
161 y = n ./ sum (1 ./ x, dim); | 158 error ("mean: Dimension must be a positive integer scalar or vector"); |
162 otherwise | 159 endif |
163 error ("mean: mean type '%s' not recognized", mean_type); | 160 |
164 endswitch | 161 if (isscalar (vecdim)) |
162 | |
163 n = size (x, vecdim); | |
164 if (omitnan) | |
165 n = sum (! isnan (x), vecdim); | |
166 x(isnan (x)) = 0; | |
167 endif | |
168 y = sum (x, vecdim) ./ n; | |
169 | |
170 else | |
171 | |
172 sz = size (x); | |
173 ndims = length (sz); | |
174 misdim = [1:ndims]; | |
175 | |
176 ## keep remaining dimensions | |
177 for i = 1:length (vecdim) | |
178 misdim(misdim == vecdim(i)) = []; | |
179 endfor | |
180 | |
181 switch (length (misdim)) | |
182 ## if all dimensions are given, compute x(:) | |
183 case 0 | |
184 n = length (x(:)); | |
185 if (omitnan) | |
186 n = length (x(! isnan (x))); | |
187 x(isnan (x)) = 0; | |
188 endif | |
189 y = sum (x(:), 1) ./ n; | |
190 | |
191 ## for 1 dimension left, return column vector | |
192 case 1 | |
193 x = permute (x, [misdim, vecdim]); | |
194 for i = 1:size (x, 1) | |
195 x_vec = x(i,:,:,:,:,:,:)(:); | |
196 if (omitnan) | |
197 x_vec = x_vec(! isnan (x_vec)); | |
198 endif | |
199 y(i) = sum (x_vec, 1) ./ length (x_vec); | |
200 endfor | |
201 | |
202 ## for 2 dimensions left, return matrix | |
203 case 2 | |
204 x = permute (x, [misdim, vecdim]); | |
205 for i = 1:size (x, 1) | |
206 for j = 1:size (x, 2) | |
207 x_vec = x(i,j,:,:,:,:,:)(:); | |
208 if (omitnan) | |
209 x_vec = x_vec(! isnan (x_vec)); | |
210 endif | |
211 y(i,j) = sum (x_vec, 1) ./ length (x_vec); | |
212 endfor | |
213 endfor | |
214 | |
215 ## for more that 2 dimensions left, print usage | |
216 otherwise | |
217 error ("vecdim must index at least N-2 dimensions of X"); | |
218 endswitch | |
219 endif | |
220 | |
221 endif | |
165 | 222 |
166 ## Convert output as requested | 223 ## Convert output as requested |
167 switch (out_type) | 224 switch (outtype) |
168 case "default" | 225 case "default" |
169 ## do nothing, the operators already do the right thing | 226 ## do nothing, the operators already do the right thing |
170 case "double" | 227 case "double" |
171 y = double (y); | 228 y = double (y); |
172 case "native" | 229 case "native" |
173 if (islogical (x)) | 230 if (! islogical (x)) |
174 ## ignore it, return double anyway | |
175 else | |
176 y = cast (y, class (x)); | 231 y = cast (y, class (x)); |
177 endif | 232 endif |
178 otherwise | 233 otherwise |
179 ## this should have been filtered out during input check, but... | 234 error ("mean: OUTTYPE '%s' not recognized", outtype); |
180 error ("mean: OUTTYPE '%s' not recognized", out_type); | |
181 endswitch | 235 endswitch |
182 | 236 |
183 endfunction | 237 endfunction |
184 | 238 |
185 | 239 |
189 %! z = [y, y+10]; | 243 %! z = [y, y+10]; |
190 %! assert (mean (x), 0); | 244 %! assert (mean (x), 0); |
191 %! assert (mean (y), 0); | 245 %! assert (mean (y), 0); |
192 %! assert (mean (z), [0, 10]); | 246 %! assert (mean (z), [0, 10]); |
193 | 247 |
194 ## Test small numbers | |
195 %!assert (mean (repmat (0.1,1,1000), "g"), 0.1, 20*eps) | |
196 | |
197 %!assert (mean (magic (3), 1), [5, 5, 5]) | 248 %!assert (mean (magic (3), 1), [5, 5, 5]) |
198 %!assert (mean (magic (3), 2), [5; 5; 5]) | 249 %!assert (mean (magic (3), 2), [5; 5; 5]) |
199 %!assert (mean ([2 8], "g"), 4) | |
200 %!assert (mean ([4 4 2], "h"), 3) | |
201 %!assert (mean (logical ([1 0 1 1])), 0.75) | 250 %!assert (mean (logical ([1 0 1 1])), 0.75) |
202 %!assert (mean (single ([1 0 1 1])), single (0.75)) | 251 %!assert (mean (single ([1 0 1 1])), single (0.75)) |
203 %!assert (mean ([1 2], 3), [1 2]) | 252 %!assert (mean ([1 2], 3), [1 2]) |
204 | 253 |
205 ## Test input validation | 254 ## Test input validation |
206 %!error <Invalid call to mean. Correct usage is> mean () | 255 %!error <Invalid call to mean. Correct usage is> mean () |
256 %!error <Invalid call to mean. Correct usage is> mean (1, 2, 3) | |
207 %!error <Invalid call to mean. Correct usage is> mean (1, 2, 3, 4) | 257 %!error <Invalid call to mean. Correct usage is> mean (1, 2, 3, 4) |
208 %!error <X must be a numeric> mean ({1:5}) | 258 %!error <Invalid call to mean. Correct usage is> mean (1, "all", 3) |
209 %!error <Invalid call to mean. Correct usage is> mean (1, 2, 3) | 259 %!error <Invalid call to mean. Correct usage is> mean (1, "b") |
210 %!error <Invalid call to mean. Correct usage is> mean (1, ones (2,2)) | |
211 %!error <DIM must be an integer> mean (1, 1.5) | |
212 %!error <DIM must be .* a valid dimension> mean (1, 0) | |
213 %!error <X must not contain any negative values> mean ([1 -1], "g") | |
214 %!error <mean type 'b' not recognized> mean (1, "b") | |
215 %!error <Invalid call to mean. Correct usage is> mean (1, 1, "foo") | 260 %!error <Invalid call to mean. Correct usage is> mean (1, 1, "foo") |
261 %!error <X must be either a numeric or boolean> mean ({1:5}) | |
262 %!error <X must be either a numeric or boolean> mean ("char") | |
263 %!error <Dimension must be a positive integer> mean (1, ones (2,2)) | |
264 %!error <Dimension must be a positive integer> mean (1, 1.5) | |
265 %!error <Dimension must be a positive integer> mean (1, 0) | |
266 %!error <vecdim must index at least N-2 dimensions of X> ... | |
267 %! mean (repmat ([1:20;6:25], [5 2 6 3 5]), [1 2]) | |
216 | 268 |
217 ## Test outtype option | 269 ## Test outtype option |
218 %!test | 270 %!test |
219 %! in = [1 2 3]; | 271 %! in = [1 2 3]; |
220 %! out = 2; | 272 %! out = 2; |
238 %! in = logical ([1 0 1]); | 290 %! in = logical ([1 0 1]); |
239 %! out = 2/3; | 291 %! out = 2/3; |
240 %! assert (mean (in, "default"), mean (in)); | 292 %! assert (mean (in, "default"), mean (in)); |
241 %! assert (mean (in, "default"), out); | 293 %! assert (mean (in, "default"), out); |
242 %! assert (mean (in, "native"), out); # logical ignores native option | 294 %! assert (mean (in, "native"), out); # logical ignores native option |
295 | |
296 ## Test single input and optional arguments "all", DIM, "omitnan") | |
297 %!test | |
298 %! x = [-10:10]; | |
299 %! y = [x;x+5;x-5]; | |
300 %! assert (mean (x), 0); | |
301 %! assert (mean (y, 2), [0, 5, -5]'); | |
302 %! assert (mean (y, "all"), 0); | |
303 %! y(2,4) = NaN; | |
304 %! assert (mean (y', "omitnan"), [0 5.35 -5]); | |
305 %! z = y + 20; | |
306 %! assert (mean (z, "all"), NaN); | |
307 %! m = [20 NaN 15]; | |
308 %! assert (mean (z'), m); | |
309 %! assert (mean (z', "includenan"), m); | |
310 %! m = [20 25.35 15]; | |
311 %! assert (mean (z', "omitnan"), m); | |
312 %! assert (mean (z, 2, "omitnan"), m'); | |
313 %! assert (mean (z, 2, "native", "omitnan"), m'); | |
314 %! assert (mean (z, 2, "omitnan", "native"), m'); | |
315 | |
316 ## Test boolean input | |
317 %!test | |
318 %! assert (mean (true, "all"), 1); | |
319 %! assert (mean (false), 0); | |
320 %! assert (mean ([true false true]), 2/3, 4e-14); | |
321 %! assert (mean ([true false true], 1), [1 0 1]); | |
322 %! assert (mean ([true false NaN], 1), [1 0 NaN]); | |
323 %! assert (mean ([true false NaN], 2), NaN); | |
324 %! assert (mean ([true false NaN], 2, "omitnan"), 0.5); | |
325 %! assert (mean ([true false NaN], 2, "omitnan", "native"), 0.5); | |
326 | |
327 ## Test dimension indexing with vecdim in n-dimensional arrays | |
328 %!test | |
329 %! x = repmat ([1:20;6:25], [5 2 6 3]); | |
330 %! assert (size (mean (x, [3 2])), [10 3]); | |
331 %! assert (size (mean (x, [1 2])), [6 3]); | |
332 %! assert (size (mean (x, [1 2 4])), [1 6]); | |
333 %! assert (size (mean (x, [1 4 3])), [1 40]); | |
334 %! assert (size (mean (x, [1 2 3 4])), [1 1]); | |
335 | |
336 ## Test results with vecdim in n-dimensional arrays and "omitnan" | |
337 %!test | |
338 %! x = repmat ([1:20;6:25], [5 2 6 3]); | |
339 %! m = repmat ([10.5;15.5], [5,3]); | |
340 %! assert (mean (x, [3 2]), m, 4e-14); | |
341 %! x(2,5,6,3) = NaN; | |
342 %! m(2,3) = NaN; | |
343 %! assert (mean (x, [3 2]), m, 4e-14); | |
344 %! m(2,3) = 15.52301255230125; | |
345 %! assert (mean (x, [3 2], "omitnan"), m, 4e-14); |