Mercurial > octave
changeset 29772:fdbba73edde2
iqr.m: Overhaul function (bug #59636).
* iqr.m: Use "quantile" to calculate interquartile range. Extend dimension
input to allow "all" and vector input. Increase performance. Improve Matlab
compatibility. Add and update tests.
* NEWS: Add note about change.
author | Nicholas R. Jankowski <jankowskin@asme.org> |
---|---|
date | Fri, 11 Dec 2020 14:45:39 -0500 |
parents | 42dc5cf93f83 |
children | 5f404ae822ee |
files | NEWS scripts/statistics/iqr.m |
diffstat | 2 files changed, 216 insertions(+), 44 deletions(-) [+] |
line wrap: on
line diff
--- a/NEWS Thu Jun 17 18:26:29 2021 +0200 +++ b/NEWS Fri Dec 11 14:45:39 2020 -0500 @@ -196,6 +196,11 @@ Matlab code, but for which Octave does not yet implement the functionality. By default, this warning is enabled. +- The function `iqr` now uses Matlab compatible interpolation for +quantile values. Dimension input now allows a vector, "all", and +dimensions greater than `ndims (x)`. The function compatibly handles +`Inf` and `NaN` input values. + ### Alphabetical list of new functions added in Octave 7 * `cospi`
--- a/scripts/statistics/iqr.m Thu Jun 17 18:26:29 2021 +0200 +++ b/scripts/statistics/iqr.m Fri Dec 11 14:45:39 2020 -0500 @@ -24,80 +24,247 @@ ######################################################################## ## -*- texinfo -*- -## @deftypefn {} {} iqr (@var{x}) -## @deftypefnx {} {} iqr (@var{x}, @var{dim}) -## Return the interquartile range, i.e., the difference between the upper -## and lower quartile of the input data. +## @deftypefn {} {@var{Z} =} iqr (@var{x}) +## @deftypefnx {} {@var{Z} =} iqr (@var{x}, @var{dim}) +## @deftypefnx {} {@var{Z} =} iqr (@var{x}, @qcode{"ALL"}) +## Return the interquartile range of @var{x}, defined as the distance between +## the 25th and 75th percentile values of @var{x} calculated using: +## quantile (x, [0.25 0.75]) ## -## If @var{x} is a matrix, do the above for first non-singleton dimension of +## If @var{x} is a vector, @code{iqr (@var{x})} will operate on the data in ## @var{x}. ## -## If the optional argument @var{dim} is given, operate along this dimension. +## If @var{x} is a matrix, @code{iqr (@var{x})} will operate independently on +## each column in @var{x} returning a row vector @var{Z}. +## +## If @var{x} is a n-dimensional array, @code{iqr (@var{x})} will operate +## independently on the first non-singleton dimension in @var{x}, returning an +## array @var{Z} the same shape as @var{x} with the non-singleton dimenion +## reduced to 1. +## +## The optional variable @var{dim} can be used to force @code{iqr} to operate +## over the specified dimension. @var{dim} can either be a scalar dimension or +## a vector of non-repeating dimensions over which to operate. In either case +## @var{dim} must be positive integers. A vector @var{dim} concatenates all +## specified diminsions for independent operation by @code{iqr}. +## +## Specifying dimension @qcode{"ALL"} will force @code{iqr} to operate +## on all elements of @var{x}, and is equivalent to @code{iqr (@var{x}(:))}. +## Similarly, specifying a vector dimension including all non-singleton +## dimensions of @var{x} is equivalent to @code{iqr (@var{x}, @qcode{"ALL"})}. +## +## If @var{x} is a scalar, or only singleton dimensions are specified for +## @var{dim}, the output will be @code{zeros (size (@var{x}))}. ## ## As a measure of dispersion, the interquartile range is less affected by ## outliers than either @code{range} or @code{std}. -## @seealso{bounds, mad, range, std} +## +## @seealso{bounds, mad, range, std, prctile, quantile} ## @end deftypefn -function y = iqr (x, dim) +## TODO: When Probability Distribution Objects are implemented, enable +## handling for those object types. +function z = iqr (x, dim) + + ## input checks if (nargin < 1) print_usage (); + elseif (nargin < 2) + dim = []; endif if (! (isnumeric (x) || islogical (x))) error ("iqr: X must be a numeric vector or matrix"); endif + vecdim_flag = false; nd = ndims (x); sz = size (x); - nel = numel (x); - if (nargin != 2) - ## Find the first non-singleton dimension. - (dim = find (sz > 1, 1)) || (dim = 1); + + if isempty (dim) + ## Find first non-singleton dimension. + if (max (sz) == 1) + dim = 2; + else + dim = find ((sz > 1), 1); + endif else - if (!(isscalar (dim) && dim == fix (dim)) - || !(1 <= dim && dim <= nd)) - error ("iqr: DIM must be an integer and a valid dimension"); + + if (isvector (dim) && isnumeric (dim) && all (dim > 0) && all (rem (dim, 1) == 0)) + + if (((num_vecdims = numel (dim)) > 1) ... + && all (diff (sort (dim)))) + ## DIM must be 1D and non repeating. + + ## Detect trivial case of DIM being all dimensions (same as "all"). + highest_dim = (max (nd, max (dim))); + if ((num_vecdims == nd) && (highest_dim == nd)) + x = x(:); + sz = size (x); + dim = 1; + else + ## Move dimensions for operation to the front, keeping the order of + ## the remaining dimensions. + ## Reshape those into a single dimension. + ## Process as normal for a dim1 iqr on X, reshape when done. + + vecdim_flag = true; ## flag for final reshape + + if (iscolumn (dim)) + dim = dim.'; + endif + + ## Permutation vector with DIM at front + perm = [1:highest_dim]; + perm(dim) = []; + perm = [dim, perm]; + + ## Reshape X to put dims to process at front. + x = permute (x, perm); + sz_x_new = size (x); + + ## Preserve trailing singletons when dim > ndims (x). + sz_x_new = [sz_x_new, ones(1, highest_dim - numel (sz_x_new))]; + + newshape = [prod(sz_x_new(1:num_vecdims)), ... + ones(1, (num_vecdims-1)), ... + sz_x_new((num_vecdims+1):end)]; + + if (numel (newshape) == 1) + newshape = [newshape, 1]; + endif + + ## Collapse dimensions to be processses into single column. + x = reshape (x, newshape); + + ## Operate column-wise. + dim = 1; + endif + + elseif (! isscalar (dim)) + error ("iqr: vector DIM must contain non-repeating positive integers"); + endif + + elseif (strcmp (tolower (dim), "all")) + ## "ALL" simplifies to collapsing all elements to single vector + x = x(:); + dim = 1; + sz = size (x); + + else + error ("iqr: DIM must be a positive integer scalar, vector, or 'all'"); endif + endif - ## This code is a bit heavy, but is needed until empirical_inv - ## can take a matrix, rather than just a vector argument. - n = sz(dim); - sz(dim) = 1; - if (isa (x, "single")) - y = zeros (sz, "single"); + if (((dim > nd) || (sz(dim) == 1)) && all (isfinite (x))) + ## shortcut easy zeros + z = zeros (sz); + elseif (iscolumn (x) && (dim == 1)) + ## detect col vector with quantile/diff dim requirement mismatch + z = abs (diff (quantile (x, [0.25, 0.75], 1), [], 2)); else - y = zeros (sz); + z = abs (diff (quantile (x, [0.25, 0.75], dim), [], dim)); endif - stride = prod (sz(1:dim-1)); - for i = 1 : nel / n - offset = i; - offset2 = 0; - while (offset > stride) - offset -= stride; - offset2 += 1; - endwhile - offset += offset2 * stride * n; - rng = [0 : n-1] * stride + offset; - y(i) = diff (empirical_inv ([1/4, 3/4], x(rng))); - endfor + if (vecdim_flag) + z = ipermute (z, perm); + endif endfunction +%!assert (iqr (17), 0) +%!assert (iqr (17, 1), 0) +%!assert (iqr (17, 4), 0) +%!assert (iqr (1:3), 1.5) +%!assert (iqr (1:4), 2) +%!assert (iqr (1:5), 2.5) +%!assert (iqr (1:10), 5) +%!assert (iqr ((1:10).'), 5) +%!assert (iqr (1:10, 2), 5) +%!assert (iqr (1:10, 1), zeros(1, 10)) +%!assert (iqr (1:10, 3), zeros(1, 10)) +%!assert (iqr ([1:5; 2:6], "all"), 3) -%!assert (iqr (1:101), 50) -%!assert (iqr (single (1:101)), single (50)) +%!test +%! x = reshape (1:6, [1 2 3]); +%! assert (iqr (x), ones (1, 1, 3)); +%! assert (iqr (x, 1), zeros (1, 2, 3)); +%! assert (iqr (x, 2), ones( 1, 1, 3)); +%! assert (iqr (x, 3), [3 3]); -## FIXME: iqr throws horrible error when running across a dimension that is 1. +## n-D arrays +%!test +%! x = magic (4); x = cat (3,x, 2*x, 3*x); x = cat (4, x, 2*x); +%! y = cat (3, 8*[1 1 1 1], 16*[1 1 1 1], 24*[1 1 1 1]); +%! assert (iqr (x), cat (4, y, 2*y)); +%! assert (iqr (x, 1), cat (4, y, 2*y)); +%! y = cat (3, 4*[3 1 1 3].', 8*[3 1 1 3].', 12*[3 1 1 3].'); +%! assert (iqr (x, 2), cat (4, y, 2*y)); +%! y = [24 3 4.5 19.5; 7.5 16.5 15 12; 13.5 10.5 9, 18; 6 21 22.5 1.5]; +%! assert (iqr (x, 3), cat (4, y, 2*y)); +%! y = [16 2 3 13; 5 11 10 8; 9 7 6 12; 4 14 15 1]; +%! assert (iqr (x, 4), cat (3, y, 2*y, 3*y)); +%! assert (iqr (x, 5), zeros (size (x))); + +## vector dimensions +%!assert (iqr (17, [1 8]), 0) +%!assert (iqr ([[1 2 5]; [2 5 6]], [1 2]), 3) +%!assert (iqr (cat (3, [1 2 5; 2 5 6], [1 2 5; 2 5 6]), [1 2]), cat(3, 3, 3)) +%!assert (iqr (cat (3, [1 2 5; 2 5 6], [1 2 5; 2 5 6]), [1 2]'), cat(3, 3, 3)) %!test -%! x = [1:100]'; -%! assert (iqr (x, 1), 50); -%! assert (iqr (x', 2), 50); +%! x = magic (4); x = cat (3, x, 2*x, 3*x); x = cat (4, x, 2*x); +%! y = cat (3, 8, 16, 24); +%! assert (iqr (x, [1 2]), cat (4, y, 2*y)); +%! y = [14, 18.5, 17.5 19.5]; +%! assert (iqr (x, [1 3]), cat (4, y, 2*y)); +%! y = [10.5 12.5 11.5 15.0000]; +%! assert (iqr (x, [1 4]), cat (3, y, 2*y, 3*y)); +%! assert (iqr (x, [1 5]), iqr (x, 1)); +%! y = [24 13 12 25.5]'; +%! assert (iqr (x, [2 3]), cat (4, y, 2*y)); +%! y = [17.5, 9, 8, 18.5]'; +%! assert (iqr (x, [2 4]), cat (3, y, 2*y, 3*y)); +%! assert (iqr (x, [3 4]), [32 4 6 26; 10 22 20 16; 18 14 12 24; 8 28 30 2]); +%! assert (iqr (x, [3 4]), iqr (x, [4 3])); +%! assert (iqr (x, [1 2 3]), cat (4, 17.5, 35)); +%! assert (iqr (x, [2 3 4]), [29.5 19.5 23 31]'); +%! assert (iqr (x, [1 3 4]), [22 28 22 30.5]); +%! assert (iqr (x, [1 2 4]), cat (3, 11, 22, 33)); +%! assert (iqr (x, [1 2 5]), iqr (x, [1 2])); +%! assert (iqr (x, [5 6]), zeros (size (x))); -%!error <Invalid call> iqr () -%!error iqr (1) -%!error iqr (['A'; 'B']) -%!error iqr (1:10, 3) +## Inf, NaN +%!assert (iqr (Inf), NaN) +%!assert (iqr (-Inf), NaN) +%!assert (iqr (NaN), NaN) +%!assert (iqr (NaN), NaN) +%!assert (iqr ([1 2 Inf], 1), [0 0 NaN]) +%!assert (iqr ([1 2 Inf], 2), Inf) +%!assert (iqr ([1 2 -Inf], 1), [0 0 NaN]) +%!assert (iqr ([1 2 -Inf], 2), Inf) +%!assert (iqr ([1 2 3 NaN], 1), [0 0 0 NaN]) +%!assert (iqr ([1 2 3 NaN], 2), 1.5) +%!assert (iqr ([1 NaN 2 3], 2), 1.5) +%!assert (iqr (NaN (2), 1), [NaN, NaN]) +%!assert (iqr (NaN (2), 2), [NaN; NaN]) +%!assert (iqr (NaN (2), 3), NaN (2)) +%!assert (iqr ([[1 2 5], [2 NaN 6]], "all"), 3.5) + +## input validation +%!error iqr () +%!error iqr (1, 2, 3) +%!error <X .* numeric> iqr (['A'; 'B']) +%!error <DIM .* positive integer> iqr (1, 'A') +%!error <DIM .* positive integer> iqr (1, 0) +%!error <DIM .* positive integer> iqr (1, -2) +%!error <DIM .* positive integer> iqr (1, 1.4) +%!error <DIM .* positive integer> iqr (1, [1 -2]) +%!error <DIM .* positive integer> iqr (1, [1 1.4]) +%!error <DIM .* positive integer> iqr ([1 2 3], NaN) +%!error <DIM .* positive integer> iqr ([1 2 3], [2 NaN]) +%!error <DIM .* positive integer> iqr ([1 2 3], Inf) +%!error <DIM .* positive integer> iqr ([1 2 3], [2 Inf]) +%!error <vector DIM .* non-repeating> iqr ([1 2 3], [1 2 1]) +%!error <DIM .* vector> iqr (1, [1 2; 3 4])