changeset 24574:b7047fe47123

bounds.m: New function to find smallest and largest element of dataset. * scripts/statistics/bounds.m: New function. * scripts/statistics/module.mk: Add bounds.m to build system. * NEWS: Announce new function. * stats.txi: Add function to manual. * iqr.m, range.m, std.m: Add seealso links to bounds.
author Rik <rik@octave.org>
date Tue, 09 Jan 2018 20:42:25 -0800
parents 28a4037d10ab
children e6b22e378389
files NEWS doc/interpreter/stats.txi scripts/statistics/bounds.m scripts/statistics/iqr.m scripts/statistics/module.mk scripts/statistics/range.m scripts/statistics/std.m
diffstat 7 files changed, 121 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS	Tue Jan 09 17:40:10 2018 -0500
+++ b/NEWS	Tue Jan 09 20:42:25 2018 -0800
@@ -214,6 +214,7 @@
 
  ** Other new functions added in 4.4:
 
+      bounds
       camlookat
       camorbit
       campos
--- a/doc/interpreter/stats.txi	Tue Jan 09 17:40:10 2018 -0500
+++ b/doc/interpreter/stats.txi	Tue Jan 09 20:42:25 2018 -0800
@@ -72,6 +72,8 @@
 to measure the dispersion of the data.  Octave provides several functions for
 measuring dispersion.
 
+@DOCSTRING(bounds)
+
 @DOCSTRING(range)
 
 @DOCSTRING(iqr)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/statistics/bounds.m	Tue Jan 09 20:42:25 2018 -0800
@@ -0,0 +1,114 @@
+## Copyright (C) 2018 Rik Wehbring
+##
+## This file is part of Octave.
+##
+## Octave is free software: you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 3 of the License, or
+## (at your option) any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <https://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn  {} {[@var{s}, @var{l}] =} bounds (@var{x})
+## @deftypefnx {} {[@var{s}, @var{l}] =} bounds (@var{x}, @var{dim})
+## @deftypefnx {} {[@var{s}, @var{l}] =} bounds (@dots{}, "nanflag")
+## Return the smallest and largest values of the input data @var{x}.
+##
+## If @var{x} is a vector, the bounds are calculated over the elements of
+## @var{x}.  If @var{x} is a matrix, the bounds are calculated for each column.
+## For a multi-dimensional array, the bounds are calculated over the first
+## non-singleton dimension.
+##
+## If the optional argument @var{dim} is given, operate along this dimension.
+##
+## The optional argument @qcode{"nanflag"} defaults to @qcode{"omitnan"} which
+## does not include NaN values in the result.  If the argument
+## @qcode{"includenan"} is given, and there is a NaN present, then the result
+## for both smallest (@var{s}) and largest (@var{l}) elements will be NaN.
+##
+## The bounds are a quickly computed measure of the dispersion of a data set,
+## but are less accurate than @code{iqr} if there are outlying data points.
+## @seealso{range, iqr, std}
+## @end deftypefn
+
+function [s, l] = bounds (x, dim, nanflag = false)
+
+  if (nargin < 1 || nargin > 3)
+    print_usage ();
+  endif
+
+  if (! (isnumeric (x) || islogical (x)))
+    error ("bounds: X must be a numeric vector or matrix");
+  endif
+
+  need_dim = true;
+  if (nargin == 2)
+    if (ischar (dim))
+      nanflag = dim;
+    else
+      need_dim = false;
+    endif
+  elseif (nargin == 3)
+    need_dim = ifelse (isempty (dim), true, false);
+  endif
+
+  sz = size (x);
+  if (need_dim)
+    ## Find the first non-singleton dimension.
+    (dim = find (sz > 1, 1)) || (dim = 1);
+  else
+    if (! (isscalar (dim) && dim == fix (dim) && dim > 0))
+      error ("bounds: DIM must be an integer and a valid dimension");
+    endif
+  endif
+
+  if (nanflag)
+    nanflag = strcmp (nanflag, "includenan");
+  endif
+  
+  s = min (x, [], dim);
+  l = max (x, [], dim);
+  if (nanflag)
+    nanidx = any (isnan (x), dim); 
+    s(nanidx) = NaN; 
+    l(nanidx) = NaN; 
+  endif
+
+endfunction
+
+
+%!assert (bounds (1:10), [1, 10])
+%!assert (bounds ([10:-1:1]'), [1, 10])
+%!assert (bounds (single (1:10)), single ([1, 10]))
+%!assert (bounds (magic (3)), [3, 1, 2])
+%!assert (bounds (magic (3), 2), [1; 3; 2])
+%!test
+%! x = magic (3);
+%! x(2,3) = NaN;
+%! assert (bounds (x), [3, 1, 2]);
+%! assert (bounds (x, "omitnan"), [3, 1, 2]);
+%! assert (bounds (x, "includenan"), [3, 1, NaN]);
+%! assert (bounds (x, 2), [1; 3; 2]);
+%! assert (bounds (x, 2, "omitnan"), [1; 3; 2]);
+%! assert (bounds (x, 2, "includenan"), [1; NaN; 2]);
+%!test
+%! x = reshape (1:27, [3, 3, 3]);
+%! [s,l] = bounds (x, 3);
+%! assert (s, x(:,:,1));
+%! assert (l, x(:,:,3));
+
+## Test input validation
+%!error bounds ()
+%!error bounds (1, 2, 3, 4)
+%!error <X must be a numeric> bounds (['A'; 'B'])
+%!error <DIM must be an integer> bounds (1, ones (2,2))
+%!error <DIM must be an integer> bounds (1, 1.5)
+%!error <DIM must be .* a valid dimension> bounds (1, 0)
--- a/scripts/statistics/iqr.m	Tue Jan 09 17:40:10 2018 -0500
+++ b/scripts/statistics/iqr.m	Tue Jan 09 20:42:25 2018 -0800
@@ -29,7 +29,7 @@
 ##
 ## As a measure of dispersion, the interquartile range is less affected by
 ## outliers than either @code{range} or @code{std}.
-## @seealso{range, std}
+## @seealso{bounds, range, std}
 ## @end deftypefn
 
 ## Author KH <Kurt.Hornik@wu-wien.ac.at>
--- a/scripts/statistics/module.mk	Tue Jan 09 17:40:10 2018 -0500
+++ b/scripts/statistics/module.mk	Tue Jan 09 20:42:25 2018 -0800
@@ -1,6 +1,7 @@
 FCN_FILE_DIRS += scripts/statistics
 
 %canon_reldir%_FCN_FILES = \
+  %reldir%/bounds.m \
   %reldir%/center.m \
   %reldir%/corrcoef.m \
   %reldir%/corr.m \
--- a/scripts/statistics/range.m	Tue Jan 09 17:40:10 2018 -0500
+++ b/scripts/statistics/range.m	Tue Jan 09 20:42:25 2018 -0800
@@ -31,7 +31,7 @@
 ##
 ## The range is a quickly computed measure of the dispersion of a data set, but
 ## is less accurate than @code{iqr} if there are outlying data points.
-## @seealso{iqr, std}
+## @seealso{bounds, iqr, std}
 ## @end deftypefn
 
 ## Author: KH <Kurt.Hornik@wu-wien.ac.at>
--- a/scripts/statistics/std.m	Tue Jan 09 17:40:10 2018 -0500
+++ b/scripts/statistics/std.m	Tue Jan 09 20:42:25 2018 -0800
@@ -58,7 +58,7 @@
 ## @end table
 ##
 ## If the optional argument @var{dim} is given, operate along this dimension.
-## @seealso{var, range, iqr, mean, median}
+## @seealso{var, bounds, range, iqr, mean, median}
 ## @end deftypefn
 
 ## Author: jwe