changeset 29772:fdbba73edde2

iqr.m: Overhaul function (bug #59636). * iqr.m: Use "quantile" to calculate interquartile range. Extend dimension input to allow "all" and vector input. Increase performance. Improve Matlab compatibility. Add and update tests. * NEWS: Add note about change.
author Nicholas R. Jankowski <jankowskin@asme.org>
date Fri, 11 Dec 2020 14:45:39 -0500
parents 42dc5cf93f83
children 5f404ae822ee
files NEWS scripts/statistics/iqr.m
diffstat 2 files changed, 216 insertions(+), 44 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS	Thu Jun 17 18:26:29 2021 +0200
+++ b/NEWS	Fri Dec 11 14:45:39 2020 -0500
@@ -196,6 +196,11 @@
 Matlab code, but for which Octave does not yet implement the
 functionality.  By default, this warning is enabled.
 
+- The function `iqr` now uses Matlab compatible interpolation for
+quantile values.  Dimension input now allows a vector, "all", and
+dimensions greater than `ndims (x)`.  The function compatibly handles
+`Inf` and `NaN` input values.
+
 ### Alphabetical list of new functions added in Octave 7
 
 * `cospi`
--- a/scripts/statistics/iqr.m	Thu Jun 17 18:26:29 2021 +0200
+++ b/scripts/statistics/iqr.m	Fri Dec 11 14:45:39 2020 -0500
@@ -24,80 +24,247 @@
 ########################################################################
 
 ## -*- texinfo -*-
-## @deftypefn  {} {} iqr (@var{x})
-## @deftypefnx {} {} iqr (@var{x}, @var{dim})
-## Return the interquartile range, i.e., the difference between the upper
-## and lower quartile of the input data.
+## @deftypefn {} {@var{Z} =} iqr (@var{x})
+## @deftypefnx {} {@var{Z} =} iqr (@var{x}, @var{dim})
+## @deftypefnx {} {@var{Z} =} iqr (@var{x}, @qcode{"ALL"})
+## Return the interquartile range of @var{x}, defined as the distance between
+## the 25th and 75th percentile values of @var{x} calculated using:
+##    quantile (x, [0.25 0.75])
 ##
-## If @var{x} is a matrix, do the above for first non-singleton dimension of
+## If @var{x} is a vector, @code{iqr (@var{x})} will operate on the data in
 ## @var{x}.
 ##
-## If the optional argument @var{dim} is given, operate along this dimension.
+## If @var{x} is a matrix, @code{iqr (@var{x})} will operate independently on
+## each column in @var{x} returning a row vector @var{Z}.
+##
+## If @var{x} is a n-dimensional array, @code{iqr (@var{x})} will operate
+## independently on the first non-singleton dimension in @var{x}, returning an
+## array @var{Z} the same shape as @var{x} with the non-singleton dimenion
+## reduced to 1.
+##
+## The optional variable @var{dim} can be used to force @code{iqr} to operate
+## over the specified dimension.  @var{dim} can either be a scalar dimension or
+## a vector of non-repeating dimensions over which to operate.  In either case
+## @var{dim} must be positive integers. A vector @var{dim} concatenates all
+## specified diminsions for independent operation by @code{iqr}.
+##
+## Specifying dimension @qcode{"ALL"} will force @code{iqr} to operate
+## on all elements of @var{x}, and is equivalent to @code{iqr (@var{x}(:))}.
+## Similarly, specifying a vector dimension including all non-singleton
+## dimensions of @var{x} is equivalent to @code{iqr (@var{x}, @qcode{"ALL"})}.
+##
+## If @var{x} is a scalar, or only singleton dimensions are specified for
+## @var{dim}, the output will be @code{zeros (size (@var{x}))}.
 ##
 ## As a measure of dispersion, the interquartile range is less affected by
 ## outliers than either @code{range} or @code{std}.
-## @seealso{bounds, mad, range, std}
+##
+## @seealso{bounds, mad, range, std, prctile, quantile}
 ## @end deftypefn
 
-function y = iqr (x, dim)
+## TODO:  When Probability Distribution Objects are implemented, enable
+##        handling for those object types.
 
+function z = iqr (x, dim)
+
+  ## input checks
   if (nargin < 1)
     print_usage ();
+  elseif (nargin < 2)
+    dim = [];
   endif
 
   if (! (isnumeric (x) || islogical (x)))
     error ("iqr: X must be a numeric vector or matrix");
   endif
 
+  vecdim_flag = false;
   nd = ndims (x);
   sz = size (x);
-  nel = numel (x);
-  if (nargin != 2)
-    ## Find the first non-singleton dimension.
-    (dim = find (sz > 1, 1)) || (dim = 1);
+
+  if isempty (dim)
+    ## Find first non-singleton dimension.
+    if (max (sz) == 1)
+      dim = 2;
+    else
+      dim = find ((sz > 1), 1);
+    endif
   else
-    if (!(isscalar (dim) && dim == fix (dim))
-        || !(1 <= dim && dim <= nd))
-      error ("iqr: DIM must be an integer and a valid dimension");
+
+    if (isvector (dim) && isnumeric (dim) && all (dim > 0) && all (rem (dim, 1) == 0))
+
+      if (((num_vecdims = numel (dim)) > 1) ...
+          && all (diff (sort (dim))))
+        ## DIM must be 1D and non repeating.
+
+        ## Detect trivial case of DIM being all dimensions (same as "all").
+        highest_dim = (max (nd, max (dim)));
+        if ((num_vecdims == nd) && (highest_dim == nd))
+          x = x(:);
+          sz = size (x);
+          dim = 1;
+        else
+          ## Move dimensions for operation to the front, keeping the order of
+          ## the remaining dimensions.
+          ## Reshape those into a single dimension.
+          ## Process as normal for a dim1 iqr on X, reshape when done.
+
+          vecdim_flag = true;  ## flag for final reshape
+
+          if (iscolumn (dim))
+            dim = dim.';
+          endif
+
+          ## Permutation vector with DIM at front
+          perm = [1:highest_dim];
+          perm(dim) = [];
+          perm = [dim, perm];
+
+          ## Reshape X to put dims to process at front.
+          x = permute (x, perm);
+          sz_x_new = size (x);
+
+          ## Preserve trailing singletons when dim > ndims (x).
+          sz_x_new = [sz_x_new, ones(1, highest_dim - numel (sz_x_new))];
+
+          newshape = [prod(sz_x_new(1:num_vecdims)), ...
+                      ones(1, (num_vecdims-1)), ...
+                      sz_x_new((num_vecdims+1):end)];
+
+          if (numel (newshape) == 1)
+            newshape = [newshape, 1];
+          endif
+
+          ## Collapse dimensions to be processses into single column.
+          x = reshape (x, newshape);
+
+          ## Operate column-wise.
+          dim = 1;
+        endif
+
+      elseif (! isscalar (dim))
+        error ("iqr: vector DIM must contain non-repeating positive integers");
+      endif
+
+    elseif (strcmp (tolower (dim), "all"))
+      ## "ALL" simplifies to collapsing all elements to single vector
+      x = x(:);
+      dim = 1;
+      sz = size (x);
+
+    else
+      error ("iqr: DIM must be a positive integer scalar, vector, or 'all'");
     endif
+
   endif
 
-  ## This code is a bit heavy, but is needed until empirical_inv
-  ## can take a matrix, rather than just a vector argument.
-  n = sz(dim);
-  sz(dim) = 1;
-  if (isa (x, "single"))
-    y = zeros (sz, "single");
+  if (((dim > nd) || (sz(dim) == 1)) && all (isfinite (x)))
+    ## shortcut easy zeros
+    z = zeros (sz);
+  elseif (iscolumn (x) && (dim == 1))
+    ## detect col vector with quantile/diff dim requirement mismatch
+    z = abs (diff (quantile (x, [0.25, 0.75], 1), [], 2));
   else
-    y = zeros (sz);
+    z = abs (diff (quantile (x, [0.25, 0.75], dim), [], dim));
   endif
-  stride = prod (sz(1:dim-1));
-  for i = 1 : nel / n
-    offset = i;
-    offset2 = 0;
-    while (offset > stride)
-      offset -= stride;
-      offset2 += 1;
-    endwhile
-    offset += offset2 * stride * n;
-    rng = [0 : n-1] * stride + offset;
 
-    y(i) = diff (empirical_inv ([1/4, 3/4], x(rng)));
-  endfor
+  if (vecdim_flag)
+    z = ipermute (z, perm);
+  endif
 
 endfunction
 
+%!assert (iqr (17), 0)
+%!assert (iqr (17, 1), 0)
+%!assert (iqr (17, 4), 0)
+%!assert (iqr (1:3), 1.5)
+%!assert (iqr (1:4), 2)
+%!assert (iqr (1:5), 2.5)
+%!assert (iqr (1:10), 5)
+%!assert (iqr ((1:10).'), 5)
+%!assert (iqr (1:10, 2), 5)
+%!assert (iqr (1:10, 1), zeros(1, 10))
+%!assert (iqr (1:10, 3), zeros(1, 10))
+%!assert (iqr ([1:5; 2:6], "all"), 3)
 
-%!assert (iqr (1:101), 50)
-%!assert (iqr (single (1:101)), single (50))
+%!test
+%! x = reshape (1:6, [1 2 3]);
+%! assert (iqr (x), ones (1, 1, 3));
+%! assert (iqr (x, 1), zeros (1, 2, 3));
+%! assert (iqr (x, 2), ones( 1, 1, 3));
+%! assert (iqr (x, 3), [3 3]);
 
-## FIXME: iqr throws horrible error when running across a dimension that is 1.
+## n-D arrays
+%!test
+%! x = magic (4); x = cat (3,x, 2*x, 3*x); x = cat (4, x, 2*x);
+%! y = cat (3, 8*[1 1 1 1], 16*[1 1 1 1], 24*[1 1 1 1]);
+%! assert (iqr (x), cat (4, y, 2*y));
+%! assert (iqr (x, 1), cat (4, y, 2*y));
+%! y = cat (3, 4*[3 1 1 3].', 8*[3 1 1 3].', 12*[3 1 1 3].');
+%! assert (iqr (x, 2), cat (4, y, 2*y));
+%! y = [24 3 4.5 19.5; 7.5 16.5 15 12; 13.5 10.5 9, 18; 6 21 22.5 1.5];
+%! assert (iqr (x, 3), cat (4, y, 2*y));
+%! y = [16 2 3 13; 5 11 10 8; 9 7 6 12; 4 14 15 1];
+%! assert (iqr (x, 4), cat (3, y, 2*y, 3*y));
+%! assert (iqr (x, 5), zeros (size (x)));
+
+## vector dimensions
+%!assert (iqr (17, [1 8]), 0)
+%!assert (iqr ([[1 2 5]; [2 5 6]], [1 2]), 3)
+%!assert (iqr (cat (3, [1 2 5; 2 5 6], [1 2 5; 2 5 6]), [1 2]), cat(3, 3, 3))
+%!assert (iqr (cat (3, [1 2 5; 2 5 6], [1 2 5; 2 5 6]), [1 2]'), cat(3, 3, 3))
 %!test
-%! x = [1:100]';
-%! assert (iqr (x, 1), 50);
-%! assert (iqr (x', 2), 50);
+%! x = magic (4); x = cat (3, x, 2*x, 3*x); x = cat (4, x, 2*x);
+%! y = cat (3, 8, 16, 24);
+%! assert (iqr (x, [1 2]), cat (4, y, 2*y));
+%! y = [14, 18.5, 17.5 19.5];
+%! assert (iqr (x, [1 3]), cat (4, y, 2*y));
+%! y = [10.5 12.5 11.5 15.0000];
+%! assert (iqr (x, [1 4]), cat (3, y, 2*y, 3*y));
+%! assert (iqr (x, [1 5]), iqr (x, 1));
+%! y = [24 13 12 25.5]';
+%! assert (iqr (x, [2 3]), cat (4, y, 2*y));
+%! y = [17.5, 9, 8, 18.5]';
+%! assert (iqr (x, [2 4]), cat (3, y, 2*y, 3*y));
+%! assert (iqr (x, [3 4]), [32 4 6 26; 10 22 20 16; 18 14 12 24; 8 28 30 2]);
+%! assert (iqr (x, [3 4]), iqr (x, [4 3]));
+%! assert (iqr (x, [1 2 3]), cat (4, 17.5, 35));
+%! assert (iqr (x, [2 3 4]), [29.5 19.5 23 31]');
+%! assert (iqr (x, [1 3 4]), [22 28 22 30.5]);
+%! assert (iqr (x, [1 2 4]), cat (3, 11, 22, 33));
+%! assert (iqr (x, [1 2 5]), iqr (x, [1 2]));
+%! assert (iqr (x, [5 6]), zeros (size (x)));
 
-%!error <Invalid call> iqr ()
-%!error iqr (1)
-%!error iqr (['A'; 'B'])
-%!error iqr (1:10, 3)
+## Inf, NaN
+%!assert (iqr (Inf), NaN)
+%!assert (iqr (-Inf), NaN)
+%!assert (iqr (NaN), NaN)
+%!assert (iqr (NaN), NaN)
+%!assert (iqr ([1 2 Inf], 1), [0 0 NaN])
+%!assert (iqr ([1 2 Inf], 2), Inf)
+%!assert (iqr ([1 2 -Inf], 1), [0 0 NaN])
+%!assert (iqr ([1 2 -Inf], 2), Inf)
+%!assert (iqr ([1 2 3 NaN], 1), [0 0 0 NaN])
+%!assert (iqr ([1 2 3 NaN], 2), 1.5)
+%!assert (iqr ([1 NaN 2 3], 2), 1.5)
+%!assert (iqr (NaN (2), 1), [NaN, NaN])
+%!assert (iqr (NaN (2), 2), [NaN; NaN])
+%!assert (iqr (NaN (2), 3), NaN (2))
+%!assert (iqr ([[1 2 5], [2 NaN 6]], "all"), 3.5)
+
+## input validation
+%!error iqr ()
+%!error iqr (1, 2, 3)
+%!error <X .* numeric> iqr (['A'; 'B'])
+%!error <DIM .* positive integer> iqr (1, 'A')
+%!error <DIM .* positive integer> iqr (1, 0)
+%!error <DIM .* positive integer> iqr (1, -2)
+%!error <DIM .* positive integer> iqr (1, 1.4)
+%!error <DIM .* positive integer> iqr (1, [1 -2])
+%!error <DIM .* positive integer> iqr (1, [1 1.4])
+%!error <DIM .* positive integer> iqr ([1 2 3], NaN)
+%!error <DIM .* positive integer> iqr ([1 2 3], [2 NaN])
+%!error <DIM .* positive integer> iqr ([1 2 3], Inf)
+%!error <DIM .* positive integer> iqr ([1 2 3], [2 Inf])
+%!error <vector DIM .* non-repeating> iqr ([1 2 3], [1 2 1])
+%!error <DIM .* vector> iqr (1, [1 2; 3 4])