changeset 24584:7a18e02a516e

mad.m: New function to calculate mean or median absolute deviation. * scripts/statistics/mad.m: New function. * scripts/statistics/module.mk: Add mad.m to build system. * NEWS: Announce new function. * stats.txi: Add function to manual. * bounds.m, iqr.m, range.m, std.m: Add seealso links to mad.
author Rik <rik@octave.org>
date Wed, 10 Jan 2018 16:09:53 -0800
parents 466e2aab871b
children 8a4aedbb3e5a
files NEWS doc/interpreter/stats.txi scripts/statistics/bounds.m scripts/statistics/iqr.m scripts/statistics/mad.m scripts/statistics/module.mk scripts/statistics/range.m scripts/statistics/std.m
diffstat 8 files changed, 115 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS	Wed Jan 10 15:41:47 2018 -0500
+++ b/NEWS	Wed Jan 10 16:09:53 2018 -0800
@@ -235,6 +235,7 @@
       integral3
       isgraphics
       isstring
+      mad
       openvar
       quad2d
       repelem
--- a/doc/interpreter/stats.txi	Wed Jan 10 15:41:47 2018 -0500
+++ b/doc/interpreter/stats.txi	Wed Jan 10 16:09:53 2018 -0800
@@ -78,6 +78,8 @@
 
 @DOCSTRING(iqr)
 
+@DOCSTRING(mad)
+
 @DOCSTRING(meansq)
 
 @DOCSTRING(std)
--- a/scripts/statistics/bounds.m	Wed Jan 10 15:41:47 2018 -0500
+++ b/scripts/statistics/bounds.m	Wed Jan 10 16:09:53 2018 -0800
@@ -36,7 +36,7 @@
 ##
 ## The bounds are a quickly computed measure of the dispersion of a data set,
 ## but are less accurate than @code{iqr} if there are outlying data points.
-## @seealso{range, iqr, std}
+## @seealso{range, iqr, mad, std}
 ## @end deftypefn
 
 function [s, l] = bounds (x, dim, nanflag = false)
--- a/scripts/statistics/iqr.m	Wed Jan 10 15:41:47 2018 -0500
+++ b/scripts/statistics/iqr.m	Wed Jan 10 16:09:53 2018 -0800
@@ -29,7 +29,7 @@
 ##
 ## As a measure of dispersion, the interquartile range is less affected by
 ## outliers than either @code{range} or @code{std}.
-## @seealso{bounds, range, std}
+## @seealso{bounds, mad, range, std}
 ## @end deftypefn
 
 ## Author KH <Kurt.Hornik@wu-wien.ac.at>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/statistics/mad.m	Wed Jan 10 16:09:53 2018 -0800
@@ -0,0 +1,107 @@
+## Copyright (C) 2017 Rik Wehbring
+##
+## This file is part of Octave.
+##
+## Octave is free software: you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 3 of the License, or
+## (at your option) any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <https://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn  {} {} mad (@var{x})
+## @deftypefnx {} {} mad (@var{x}, @var{opt})
+## @deftypefnx {} {} mad (@var{x}, @var{opt}, @var{dim})
+## Compute the mean or median absolute deviation of the elements of @var{x}.
+##
+## The mean absolute deviation is defined as
+##
+## @example
+## @var{mad} = mean (abs (@var{x} - mean (@var{x})))
+## @end example
+##
+## The median absolute deviation is defined as
+##
+## @example
+## @var{mad} = median (abs (@var{x} - median (@var{x})))
+## @end example
+##
+## If @var{x} is a matrix, compute @code{mad} for each column and return
+## results in a row vector.  For a multi-dimensional array, the calculation is
+## done over the first non-singleton dimension.
+##
+## The optional argument @var{opt} determines whether mean or median absolute
+## deviation is calculated.  The default is 0 which corresponds to mean
+## absolute deviation; A value of 1 corresponds to median absolute deviation.
+##
+## If the optional argument @var{dim} is given, operate along this dimension.
+##
+## As a measure of dispersion, @code{mad} is less affected by outliers than
+## @code{std}.
+## @seealso{bounds, range, iqr, std, mean, median}
+## @end deftypefn
+
+function retval = mad (x, opt = 0, dim)
+
+  if (nargin < 1 || nargin > 3)
+    print_usage ();
+  endif
+
+  if (! (isnumeric (x) || islogical (x)))
+    error ("mad: X must be a numeric vector or matrix");
+  endif
+
+  if (isempty (opt))
+    opt = 0;
+  elseif (! isscalar (opt) || (opt != 0 && opt != 1))
+    error ("mad: OPT must be 0 or 1");
+  endif
+
+  sz = size (x);
+  if (nargin < 3)
+    ## Find the first non-singleton dimension.
+    (dim = find (sz > 1, 1)) || (dim = 1);
+  else
+    if (! (isscalar (dim) && dim == fix (dim) && dim > 0))
+      error ("mad: DIM must be an integer and a valid dimension");
+    endif
+  endif
+
+  if (opt == 0)
+    fcn = @mean;
+  else
+    fcn = @median;
+  endif
+
+  retval = fcn (abs (x - fcn (x, dim)), dim);
+
+endfunction
+
+
+%!assert (mad ([0 0 1 2 100]), 31.76)
+%!assert (mad (single ([0 0 1 2 100])), single (31.76))
+%!assert (mad ([0 0 1 2 100]'), 31.76)
+%!assert (mad ([0 0 1 2 100], 1), 1)
+%!assert (mad (single ([0 0 1 2 100]), 1), single (1))
+%!assert (mad ([0 0 1 2 100]', 1), 1)
+%!assert (mad (magic (4)), [4, 4, 4, 4])
+%!assert (mad (magic (4), [], 2), [6; 2; 2; 6])
+%!assert (mad (magic (4), 1), [2.5, 3.5, 3.5, 2.5])
+%!assert (mad (magic (4), 1, 2), [5.5; 1.5; 1.5; 5.5])
+
+## Test input validation
+%!error mad ()
+%!error mad (1, 2, 3, 4)
+%!error <X must be a numeric> mad (['A'; 'B'])
+%!error <OPT must be 0 or 1> mad (1, 2)
+%!error <DIM must be an integer> mad (1, [], ones (2,2))
+%!error <DIM must be an integer> mad (1, [], 1.5)
+%!error <DIM must be .* a valid dimension> mad (1, [], 0)
--- a/scripts/statistics/module.mk	Wed Jan 10 15:41:47 2018 -0500
+++ b/scripts/statistics/module.mk	Wed Jan 10 16:09:53 2018 -0800
@@ -18,6 +18,7 @@
   %reldir%/iqr.m \
   %reldir%/kendall.m \
   %reldir%/kurtosis.m \
+  %reldir%/mad.m \
   %reldir%/mean.m \
   %reldir%/meansq.m \
   %reldir%/median.m \
--- a/scripts/statistics/range.m	Wed Jan 10 15:41:47 2018 -0500
+++ b/scripts/statistics/range.m	Wed Jan 10 16:09:53 2018 -0800
@@ -31,7 +31,7 @@
 ##
 ## The range is a quickly computed measure of the dispersion of a data set, but
 ## is less accurate than @code{iqr} if there are outlying data points.
-## @seealso{bounds, iqr, std}
+## @seealso{bounds, iqr, mad, std}
 ## @end deftypefn
 
 ## Author: KH <Kurt.Hornik@wu-wien.ac.at>
--- a/scripts/statistics/std.m	Wed Jan 10 15:41:47 2018 -0500
+++ b/scripts/statistics/std.m	Wed Jan 10 16:09:53 2018 -0800
@@ -58,7 +58,7 @@
 ## @end table
 ##
 ## If the optional argument @var{dim} is given, operate along this dimension.
-## @seealso{var, bounds, range, iqr, mean, median}
+## @seealso{var, bounds, mad, range, iqr, mean, median}
 ## @end deftypefn
 
 ## Author: jwe