# HG changeset patch # User Ben Abbott # Date 1206503869 14400 # Node ID 0220da981c2a4f9fc5171ea037c1bec4efc5271b # Parent 9a4541c622b51d49c51685ba72c4b68a46951d25 Modified statistics to calculate consistent median. diff -r 9a4541c622b5 -r 0220da981c2a doc/interpreter/stats.txi --- a/doc/interpreter/stats.txi Tue Mar 25 23:06:45 2008 -0400 +++ b/doc/interpreter/stats.txi Tue Mar 25 23:57:49 2008 -0400 @@ -61,6 +61,10 @@ @DOCSTRING(median) +@DOCSTRING(quantile) + +@DOCSTRING(prctile) + @DOCSTRING(meansq) @DOCSTRING(std) diff -r 9a4541c622b5 -r 0220da981c2a scripts/ChangeLog --- a/scripts/ChangeLog Tue Mar 25 23:06:45 2008 -0400 +++ b/scripts/ChangeLog Tue Mar 25 23:57:49 2008 -0400 @@ -1,3 +1,11 @@ +2008-03-20 Ben Abbott + + * statistics/base/statistics.m: Calculate median and quantiles in + a manner consistent with method #7 used by GNU R. + * statistics/base/__quantile__.m, statistics/base/quantile.m, + statistics/base/prctile.m: New functions. + * statistics/base/Makefile.in (SOURCES): Add them to the list. + 2008-03-25 Soren Hauberg * polynomial/convn.m: New function. diff -r 9a4541c622b5 -r 0220da981c2a scripts/statistics/base/Makefile.in --- a/scripts/statistics/base/Makefile.in Tue Mar 25 23:06:45 2008 -0400 +++ b/scripts/statistics/base/Makefile.in Tue Mar 25 23:57:49 2008 -0400 @@ -32,11 +32,11 @@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_DATA = @INSTALL_DATA@ -SOURCES = center.m cloglog.m cor.m corrcoef.m cov.m cut.m gls.m \ - iqr.m kendall.m kurtosis.m logit.m mahalanobis.m mean.m meansq.m \ - median.m mode.m moment.m ols.m ppplot.m probit.m qqplot.m range.m \ - ranks.m run_count.m skewness.m spearman.m statistics.m std.m \ - studentize.m table.m values.m var.m +SOURCES = __quantile__.m center.m cloglog.m cor.m corrcoef.m cov.m \ + cut.m gls.m iqr.m kendall.m kurtosis.m logit.m mahalanobis.m mean.m \ + meansq.m median.m mode.m moment.m ols.m ppplot.m prctile.m probit.m \ + qqplot.m quantile.m range.m ranks.m run_count.m skewness.m spearman.m \ + statistics.m std.m studentize.m table.m values.m var.m DISTFILES = $(addprefix $(srcdir)/, Makefile.in $(SOURCES)) diff -r 9a4541c622b5 -r 0220da981c2a scripts/statistics/base/__quantile__.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/statistics/base/__quantile__.m Tue Mar 25 23:57:49 2008 -0400 @@ -0,0 +1,260 @@ +## Copyright (C) 2008 Ben Abbott, Jaroslav Hajek +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or (at +## your option) any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, see +## . + +## -*- texinfo -*- +## @deftypefn {Function File} {@var{q} =} __quantile__ (@var{x}, @var{p}) +## @deftypefnx {Function File} {@var{q} =} __quantile__ (@var{x}, @var{p}, @var{method}) +## For the cumulative probability values in @var{p}, compute the +## quantiles, @var{q} (the inverse of the cdf), for the sample, @var{x}. +## +## The optional input, @var{method}, refers to nine methods available in R +## (http://www.r-project.org/). The default is @var{method} = 7. For more +## detail, see `help quantile'. +## @seealso{prctile, quantile, statistics} +## @end deftypefn + +## Author: Ben Abbott +## Vectorized version: Jaroslav Hajek +## Description: Quantile function of a empirical samples + +function inv = __quantile__ (x, p, method = 5) + + if (nargin < 2 || nargin > 3) + print_usage (); + endif + + if (! ismatrix (x)) + error ("quantile: x must be a matrix"); + endif + + ## Save length and set shape of quantiles. + n = numel (p); + p = p(:); + + ## Save length and set shape of samples. + ## TODO: does sort guarantee that NaN's come at the end? + x = sort (x); + m = sum (! isnan (x)); + mx = size (x, 1); + nx = size (x, 2); + + ## Initialize output values. + inv = Inf*(-(p < 0) + (p > 1)); + inv = repmat (inv, 1, nx); + + ## Do the work. + if (any(k = find((p >= 0) & (p <= 1)))) + n = length (k); + p = p (k); + ## Special case. + if (mx == 1) + inv(k,:) = repmat (x, n, 1); + return + endif + + ## The column-distribution indices. + pcd = kron (ones (n, 1), mx*(0:nx-1)); + mm = kron (ones (n, 1), m); + switch method + case {1, 2, 3} + switch method + case 1 + p = max (ceil (kron (p, m)), 1); + inv(k,:) = x(p + pcd); + + case 2 + p = kron (p, m); + p_lr = max (ceil (p), 1); + p_rl = min (floor (p + 1), mm); + inv(k,:) = (x(p_lr + pcd) + x(p_rl + pcd))/2; + + case 3 + ## Used by SAS, method PCTLDEF=2. + ## http://support.sas.com/onlinedoc/913/getDoc/en/statug.hlp/stdize_sect14.htm + t = max (kron (p, m), 1); + t = roundb (t); + inv(k,:) = x(t + pcd); + endswitch + + otherwise + switch method + case 4 + p = kron (p, m); + + case 5 + ## Used by Matlab. + p = kron (p, m) + 0.5; + + case 6 + ## Used by Minitab and SPSS. + p = kron (p, m+1); + + case 7 + ## Used by S and R. + p = kron (p, m-1) + 1; + + case 8 + ## Median unbiased . + p = kron (p, m+1/3) + 1/3; + + case 9 + ## Approximately unbiased respecting order statistics. + p = kron (p, m+0.25) + 0.375; + + otherwise + error ("quantile: Unknown method, '%d'",method) + endswitch + + ## Duplicate single values. + imm1 = mm == 1; + x(2,imm1) = x(1,imm1); + + ## Interval indices. + pi = max (min (floor (p), mm-1), 1); + pr = max (min (p - pi, 1), 0); + pi += pcd; + inv(k,:) = (1-pr) .* x(pi) + pr .* x(pi+1); + endswitch + endif + +endfunction + +%!test +%! p = 0.5; +%! x = sort (rand (11)); +%! q = __quantile__ (x, p); +%! assert (q, x(6,:)) + +%!test +%! p = [0.00, 0.25, 0.50, 0.75, 1.00]; +%! x = [1; 2; 3; 4]; +%! a = [1.0000 1.0000 2.0000 3.0000 4.0000 +%! 1.0000 1.5000 2.5000 3.5000 4.0000 +%! 1.0000 1.0000 2.0000 3.0000 4.0000 +%! 1.0000 1.0000 2.0000 3.0000 4.0000 +%! 1.0000 1.5000 2.5000 3.5000 4.0000 +%! 1.0000 1.2500 2.5000 3.7500 4.0000 +%! 1.0000 1.7500 2.5000 3.2500 4.0000 +%! 1.0000 1.4167 2.5000 3.5833 4.0000 +%! 1.0000 1.4375 2.5000 3.5625 4.0000]; +%! for m = (1:9) +%! q = __quantile__ (x, p, m).'; +%! assert (q, a(m,:), 0.0001) +%! endfor + +%!test +%! p = [0.00, 0.25, 0.50, 0.75, 1.00]; +%! x = [1; 2; 3; 4; 5]; +%! a = [1.0000 2.0000 3.0000 4.0000 5.0000 +%! 1.0000 2.0000 3.0000 4.0000 5.0000 +%! 1.0000 1.0000 2.0000 4.0000 5.0000 +%! 1.0000 1.2500 2.5000 3.7500 5.0000 +%! 1.0000 1.7500 3.0000 4.2500 5.0000 +%! 1.0000 1.5000 3.0000 4.5000 5.0000 +%! 1.0000 2.0000 3.0000 4.0000 5.0000 +%! 1.0000 1.6667 3.0000 4.3333 5.0000 +%! 1.0000 1.6875 3.0000 4.3125 5.0000]; +%! for m = (1:9) +%! q = __quantile__ (x, p, m).'; +%! assert (q, a(m,:), 0.0001) +%! endfor + +%!test +%! p = [0.00, 0.25, 0.50, 0.75, 1.00]; +%! x = [1; 2; 5; 9]; +%! a = [1.0000 1.0000 2.0000 5.0000 9.0000 +%! 1.0000 1.5000 3.5000 7.0000 9.0000 +%! 1.0000 1.0000 2.0000 5.0000 9.0000 +%! 1.0000 1.0000 2.0000 5.0000 9.0000 +%! 1.0000 1.5000 3.5000 7.0000 9.0000 +%! 1.0000 1.2500 3.5000 8.0000 9.0000 +%! 1.0000 1.7500 3.5000 6.0000 9.0000 +%! 1.0000 1.4167 3.5000 7.3333 9.0000 +%! 1.0000 1.4375 3.5000 7.2500 9.0000]; +%! for m = (1:9) +%! q = __quantile__ (x, p, m).'; +%! assert (q, a(m,:), 0.0001) +%! endfor + +%!test +%! p = [0.00, 0.25, 0.50, 0.75, 1.00]; +%! x = [1; 2; 5; 9; 11]; +%! a = [1.0000 2.0000 5.0000 9.0000 11.0000 +%! 1.0000 2.0000 5.0000 9.0000 11.0000 +%! 1.0000 1.0000 2.0000 9.0000 11.0000 +%! 1.0000 1.2500 3.5000 8.0000 11.0000 +%! 1.0000 1.7500 5.0000 9.5000 11.0000 +%! 1.0000 1.5000 5.0000 10.0000 11.0000 +%! 1.0000 2.0000 5.0000 9.0000 11.0000 +%! 1.0000 1.6667 5.0000 9.6667 11.0000 +%! 1.0000 1.6875 5.0000 9.6250 11.0000]; +%! for m = (1:9) +%! q = __quantile__ (x, p, m).'; +%! assert (q, a(m,:), 0.0001) +%! endfor + +%!test +%! p = [0.00, 0.25, 0.50, 0.75, 1.00]; +%! x = [16; 11; 15; 12; 15; 8; 11; 12; 6; 10]; +%! a = [6.0000 10.0000 11.0000 15.0000 16.0000 +%! 6.0000 10.0000 11.5000 15.0000 16.0000 +%! 6.0000 8.0000 11.0000 15.0000 16.0000 +%! 6.0000 9.0000 11.0000 13.5000 16.0000 +%! 6.0000 10.0000 11.5000 15.0000 16.0000 +%! 6.0000 9.5000 11.5000 15.0000 16.0000 +%! 6.0000 10.2500 11.5000 14.2500 16.0000 +%! 6.0000 9.8333 11.5000 15.0000 16.0000 +%! 6.0000 9.8750 11.5000 15.0000 16.0000]; +%! for m = (1:9) +%! q = __quantile__ (x, p, m).'; +%! assert (q, a(m,:), 0.0001) +%! endfor + +%!test +%! p = [0.00, 0.25, 0.50, 0.75, 1.00]; +%! x = [-0.58851; 0.40048; 0.49527; -2.551500; -0.52057; ... +%! -0.17841; 0.057322; -0.62523; 0.042906; 0.12337]; +%! a = [-2.551474 -0.588505 -0.178409 0.123366 0.495271 +%! -2.551474 -0.588505 -0.067751 0.123366 0.495271 +%! -2.551474 -0.625231 -0.178409 0.123366 0.495271 +%! -2.551474 -0.606868 -0.178409 0.090344 0.495271 +%! -2.551474 -0.588505 -0.067751 0.123366 0.495271 +%! -2.551474 -0.597687 -0.067751 0.192645 0.495271 +%! -2.551474 -0.571522 -0.067751 0.106855 0.495271 +%! -2.551474 -0.591566 -0.067751 0.146459 0.495271 +%! -2.551474 -0.590801 -0.067751 0.140686 0.495271]; +%! for m = (1:9) +%! q = __quantile__ (x, p, m).'; +%! assert (q, a(m,:), 0.0001) +%! endfor + +%!test +%! p = 0.5; +%! x = [0.112600, 0.114800, 0.052100, 0.236400, 0.139300 +%! 0.171800, 0.727300, 0.204100, 0.453100, 0.158500 +%! 0.279500, 0.797800, 0.329600, 0.556700, 0.730700 +%! 0.428800, 0.875300, 0.647700, 0.628700, 0.816500 +%! 0.933100, 0.931200, 0.963500, 0.779600, 0.846100]; +%! tol = 0.00001; +%! x(5,5) = NaN; +%! assert (__quantile__ (x, p), [0.27950, 0.79780, 0.32960, 0.55670, 0.44460], tol); +%! x(1,1) = NaN; +%! assert (__quantile__ (x, p), [0.35415, 0.79780, 0.32960, 0.55670, 0.44460], tol); +%! x(3,3) = NaN; +%! assert (__quantile__ (x, p), [0.35415, 0.79780, 0.42590, 0.55670, 0.44460], tol); + diff -r 9a4541c622b5 -r 0220da981c2a scripts/statistics/base/prctile.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/statistics/base/prctile.m Tue Mar 25 23:57:49 2008 -0400 @@ -0,0 +1,156 @@ +## Copyright (C) 2008 Ben Abbott +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or (at +## your option) any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, see +## . + +## -*- texinfo -*- +## @deftypefn {Function File} {@var{y} =} prctile (@var{x}, @var{p}) +## @deftypefnx {Function File} {@var{q} =} prctile (@var{x}, @var{p}, @var{dim}) +## For a sample @var{x}, compute the the quantiles, @var{y}, corresponding +## to the cumulative probability values, P, in percent. All non-numeric +## values (NaNs) of X are ignored. +## +## If @var{x} is a matrix, compute the the percentiles for each column and +## return them in a matrix, such that the i-th row of @var{y} contains the +## @var{p}(i)th percentiles of each column of @var{x}. +## +## The optional argument @var{dim} determines the dimension along which +## the percentiles are calculated. If @var{dim} is omitted, and @var{x} is +## a vector or matrix, it defaults to 1 (column wise quantiles). In the +## instance that @var{x} is a N-d array, @var{dim} defaults to the first +## dimension whose size greater than unity. +## +## @end deftypefn + +## Author: Ben Abbott +## Description: Matlab style prctile function. + +function q = prctile (x, p, dim) + + if (nargin < 1 || nargin > 3) + print_usage (); + endif + + if (nargin < 2) + p = 100*[0.00 0.25, 0.50, 0.75, 1.00]; + endif + + if (nargin < 3) + if (ndims (x) == 2) + ## If a matrix or vector, use the 1st dimension. + dim = 1; + else + ## If an N-d array, use the firt dimension with a length > 1. + dim = find (size(v) != 1); + if (isempty (dim)) + dim = 1; + endif + endif + endif + + ## Convert from percent. + p = 0.01 * p; + + ## The 5th method is compatible with Matlab. + method = 5; + + ## Call the quantile function + q = quantile (x, p, dim, method); + +endfunction + +%!test +%! pct = 50; +%! q = prctile (1:4, pct, 1); +%! qa = [1, 2, 3, 4]; +%! assert (q, qa); +%! q = prctile (1:4, pct, 2); +%! qa = 2.5000; +%! assert (q, qa); + +%!test +%! pct = 50; +%! x = [0.1126, 0.1148, 0.0521, 0.2364, 0.1393 +%! 0.1718, 0.7273, 0.2041, 0.4531, 0.1585 +%! 0.2795, 0.7978, 0.3296, 0.5567, 0.7307 +%! 0.4288, 0.8753, 0.6477, 0.6287, 0.8165 +%! 0.9331, 0.9312, 0.9635, 0.7796, 0.8461]; +%! tol = 0.0001; +%! q = prctile (x, pct, 1); +%! qa = [0.2795, 0.7978, 0.3296, 0.5567, 0.7307]; +%! assert (q, qa, tol); +%! q = prctile (x, pct, 2); +%! qa = [0.1148; 0.2041; 0.5567; 0.6477; 0.9312]; +%! assert (q, qa, tol); + +%!test +%! pct = 50; +%! tol = 0.0001; +%! x = [0.1126, 0.1148, 0.0521, 0.2364, 0.1393 +%! 0.1718, 0.7273, 0.2041, 0.4531, 0.1585 +%! 0.2795, 0.7978, 0.3296, 0.5567, 0.7307 +%! 0.4288, 0.8753, 0.6477, 0.6287, 0.8165 +%! 0.9331, 0.9312, 0.9635, 0.7796, 0.8461]; +%! x(5,5) = Inf; +%! q = prctile (x, pct, 1); +%! qa = [0.2795, 0.7978, 0.3296, 0.5567, 0.7307]; +%! assert (q, qa, tol); +%! x(5,5) = -Inf; +%! q = prctile (x, pct, 1); +%! qa = [0.2795, 0.7978, 0.3296, 0.5567, 0.1585]; +%! assert (q, qa, tol); +%! x(1,1) = Inf; +%! q = prctile (x, pct, 1); +%! qa = [0.4288, 0.7978, 0.3296, 0.5567, 0.1585]; +%! assert (q, qa, tol); + +%!test +%! pct = 50; +%! tol = 0.0001; +%! x = [0.1126, 0.1148, 0.0521, 0.2364, 0.1393 +%! 0.1718, 0.7273, 0.2041, 0.4531, 0.1585 +%! 0.2795, 0.7978, 0.3296, 0.5567, 0.7307 +%! 0.4288, 0.8753, 0.6477, 0.6287, 0.8165 +%! 0.9331, 0.9312, 0.9635, 0.7796, 0.8461]; +%! x(3,3) = Inf; +%! q = prctile (x, pct, 1); +%! qa = [0.2795, 0.7978, 0.6477, 0.5567, 0.7307]; +%! assert (q, qa, tol); +%! q = prctile (x, pct, 2); +%! qa = [0.1148; 0.2041; 0.7307; 0.6477; 0.9312]; +%! assert (q, qa, tol); + +%!test +%! pct = 50; +%! tol = 0.0001; +%! x = [0.1126, 0.1148, 0.0521, 0.2364, 0.1393 +%! 0.1718, 0.7273, 0.2041, 0.4531, 0.1585 +%! 0.2795, 0.7978, 0.3296, 0.5567, 0.7307 +%! 0.4288, 0.8753, 0.6477, 0.6287, 0.8165 +%! 0.9331, 0.9312, 0.9635, 0.7796, 0.8461]; +%! x(5,5) = NaN; +%! q = prctile (x, pct, 2); +%! qa = [0.1148; 0.2041; 0.5567; 0.6477; 0.9322]; +%! assert (q, qa, tol); +%! x(1,1) = NaN; +%! q = prctile (x, pct, 2); +%! qa = [0.1270; 0.2041; 0.5567; 0.6477; 0.9322]; +%! assert (q, qa, tol); +%! x(3,3) = NaN; +%! q = prctile (x, pct, 2); +%! qa = [0.1270; 0.2041; 0.6437; 0.6477; 0.9322]; +%! assert (q, qa, tol); + diff -r 9a4541c622b5 -r 0220da981c2a scripts/statistics/base/quantile.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/statistics/base/quantile.m Tue Mar 25 23:57:49 2008 -0400 @@ -0,0 +1,267 @@ +## Copyright (C) 2008 Ben Abbott +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or (at +## your option) any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, see +## . + +## -*- texinfo -*- +## @deftypefn {Function File} {@var{q} =} quantile (@var{x}, @var{p}) +## @deftypefnx {Function File} {@var{q} =} quantile (@var{x}, @var{p}, @var{dim}) +## @deftypefnx {Function File} {@var{q} =} quantile (@var{x}, @var{p}, @var{dim}, @var{method} ) +## For a sample, @var{x}, calculate the quantiles, @var{q}, corresponding to +## the cumulative probability values in @var{p}. All non-numeric values (NaNs) of +## @var{x} are ignored. +## +## If @var{x} is a matrix, compute the quantiles for each column and +## return them in a matrix, such that the i-th row of @var{q} contains +## the @var{p}(i)th quantiles of each column of @var{x}. +## +## The optional argument @var{dim} determines the dimension along which +## the percentiles are calculated. If @var{dim} is omitted, and @var{x} is +## a vector or matrix, it defaults to 1 (column wise quantiles). In the +## instance that @var{x} is a N-d array, @var{dim} defaults to the first +## dimension whose size greater than unity. +## +## The methods available to calculate sample quantiles are the nine methods +## used by R (http://www.r-project.org/). The default value is METHOD = 5. +## +## Discontinuous sample quantile methods 1, 2, and 3 +## +## @enumerate 1 +## @item Method 1: Inverse of empirical distribution function. +## @item Method 2: Similar to method 1 but with averaging at discontinuities. +## @item Method 3: SAS definition: nearest even order statistic. +## @end enumerate +## +## Continuous sample quantile methods 4 through 9, where p(k) is the linear +## interpolation function respecting each methods' representative cdf. +## +## @enumerate 4 +## @item Method 4: p(k) = k / n. That is, linear interpolation of the empirical cdf. +## @item Method 5: p(k) = (k - 0.5) / n. That is a piecewise linear function where +## the knots are the values midway through the steps of the empirical cdf. +## @item Method 6: p(k) = k / (n + 1). +## @item Method 7: p(k) = (k - 1) / (n - 1). +## @item Method 8: p(k) = (k - 1/3) / (n + 1/3). The resulting quantile estimates +## are approximately median-unbiased regardless of the distribution of @var{x}. +## @item Method 9: p(k) = (k - 3/8) / (n + 1/4). The resulting quantile estimates +## are approximately unbiased for the expected order statistics if @var{x} is +## normally distributed. +## @end enumerate +## +## Hyndman and Fan (1996) recommend method 8. Maxima, S, and R +## (versions prior to 2.0.0) use 7 as their default. Minitab and SPSS +## use method 6. Matlab uses method 5. +## +## References: +## +## @itemize @bullet +## @item Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) The New +## S Language. Wadsworth & Brooks/Cole. +## +## @item Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in +## statistical packages, American Statistician, 50, 361-365. +## +## @item R: A Language and Environment for Statistical Computing; +## @url{http://cran.r-project.org/doc/manuals/fullrefman.pdf}. +## @end itemize +## @end deftypefn + +## Author: Ben Abbott +## Description: Matlab style quantile function of a discrete/continuous distribution + +function q = quantile (x, p, dim, method) + + if (nargin < 1 || nargin > 4) + print_usage (); + endif + + if (nargin < 2) + p = [0.00 0.25, 0.50, 0.75, 1.00]; + endif + + if (nargin < 3) + dim = 1; + endif + + if (nargin < 4) + method = 5; + endif + + if (dim > ndims(x)) + error ("quantile: invalid dimension.") + endif + + # Set the permutation vector. + perm = 1:ndims(x); + perm(1) = dim; + perm(dim) = 1; + + # Permute dim to the 1st index. + x = permute (x, perm); + + # Save the size of the permuted x N-d array. + sx = size (x); + + # Reshape to a 2-d array. + x = reshape (x, [sx(1), prod(sx(2:end))]); + + # Calculate the quantiles. + q = __quantile__ (x, p, method); + + # Return the shape to the original N-d array. + q = reshape (q, [numel(p), sx(2:end)]); + + # Permute the 1st index back to dim. + q = ipermute (q, perm); + +endfunction + +%!test +%! p = 0.5; +%! x = sort (rand (11)); +%! q = quantile (x, p); +%! assert (q, x(6,:)) +%! x = x.'; +%! q = quantile (x, p, 2); +%! assert (q, x(:,6)); + +%!test +%! p = [0.00, 0.25, 0.50, 0.75, 1.00]; +%! x = [1; 2; 3; 4]; +%! a = [1.0000 1.0000 2.0000 3.0000 4.0000 +%! 1.0000 1.5000 2.5000 3.5000 4.0000 +%! 1.0000 1.0000 2.0000 3.0000 4.0000 +%! 1.0000 1.0000 2.0000 3.0000 4.0000 +%! 1.0000 1.5000 2.5000 3.5000 4.0000 +%! 1.0000 1.2500 2.5000 3.7500 4.0000 +%! 1.0000 1.7500 2.5000 3.2500 4.0000 +%! 1.0000 1.4167 2.5000 3.5833 4.0000 +%! 1.0000 1.4375 2.5000 3.5625 4.0000]; +%! for m = (1:9) +%! q = quantile (x, p, 1, m).'; +%! assert (q, a(m,:), 0.0001) +%! endfor + +%!test +%! p = [0.00, 0.25, 0.50, 0.75, 1.00]; +%! x = [1; 2; 3; 4; 5]; +%! a = [1.0000 2.0000 3.0000 4.0000 5.0000 +%! 1.0000 2.0000 3.0000 4.0000 5.0000 +%! 1.0000 1.0000 2.0000 4.0000 5.0000 +%! 1.0000 1.2500 2.5000 3.7500 5.0000 +%! 1.0000 1.7500 3.0000 4.2500 5.0000 +%! 1.0000 1.5000 3.0000 4.5000 5.0000 +%! 1.0000 2.0000 3.0000 4.0000 5.0000 +%! 1.0000 1.6667 3.0000 4.3333 5.0000 +%! 1.0000 1.6875 3.0000 4.3125 5.0000]; +%! for m = (1:9) +%! q = quantile (x, p, 1, m).'; +%! assert (q, a(m,:), 0.0001) +%! endfor + +%!test +%! p = [0.00, 0.25, 0.50, 0.75, 1.00]; +%! x = [1; 2; 5; 9]; +%! a = [1.0000 1.0000 2.0000 5.0000 9.0000 +%! 1.0000 1.5000 3.5000 7.0000 9.0000 +%! 1.0000 1.0000 2.0000 5.0000 9.0000 +%! 1.0000 1.0000 2.0000 5.0000 9.0000 +%! 1.0000 1.5000 3.5000 7.0000 9.0000 +%! 1.0000 1.2500 3.5000 8.0000 9.0000 +%! 1.0000 1.7500 3.5000 6.0000 9.0000 +%! 1.0000 1.4167 3.5000 7.3333 9.0000 +%! 1.0000 1.4375 3.5000 7.2500 9.0000]; +%! for m = (1:9) +%! q = quantile (x, p, 1, m).'; +%! assert (q, a(m,:), 0.0001) +%! endfor + +%!test +%! p = [0.00, 0.25, 0.50, 0.75, 1.00]; +%! x = [1; 2; 5; 9; 11]; +%! a = [1.0000 2.0000 5.0000 9.0000 11.0000 +%! 1.0000 2.0000 5.0000 9.0000 11.0000 +%! 1.0000 1.0000 2.0000 9.0000 11.0000 +%! 1.0000 1.2500 3.5000 8.0000 11.0000 +%! 1.0000 1.7500 5.0000 9.5000 11.0000 +%! 1.0000 1.5000 5.0000 10.0000 11.0000 +%! 1.0000 2.0000 5.0000 9.0000 11.0000 +%! 1.0000 1.6667 5.0000 9.6667 11.0000 +%! 1.0000 1.6875 5.0000 9.6250 11.0000]; +%! for m = (1:9) +%! q = quantile (x, p, 1, m).'; +%! assert (q, a(m,:), 0.0001) +%! endfor + +%!test +%! p = [0.00, 0.25, 0.50, 0.75, 1.00]; +%! x = [16; 11; 15; 12; 15; 8; 11; 12; 6; 10]; +%! a = [6.0000 10.0000 11.0000 15.0000 16.0000 +%! 6.0000 10.0000 11.5000 15.0000 16.0000 +%! 6.0000 8.0000 11.0000 15.0000 16.0000 +%! 6.0000 9.0000 11.0000 13.5000 16.0000 +%! 6.0000 10.0000 11.5000 15.0000 16.0000 +%! 6.0000 9.5000 11.5000 15.0000 16.0000 +%! 6.0000 10.2500 11.5000 14.2500 16.0000 +%! 6.0000 9.8333 11.5000 15.0000 16.0000 +%! 6.0000 9.8750 11.5000 15.0000 16.0000]; +%! for m = (1:9) +%! q = quantile (x, p, 1, m).'; +%! assert (q, a(m,:), 0.0001) +%! endfor + +%!test +%! p = [0.00, 0.25, 0.50, 0.75, 1.00]; +%! x = [-0.58851; 0.40048; 0.49527; -2.551500; -0.52057; ... +%! -0.17841; 0.057322; -0.62523; 0.042906; 0.12337]; +%! a = [-2.551474 -0.588505 -0.178409 0.123366 0.495271 +%! -2.551474 -0.588505 -0.067751 0.123366 0.495271 +%! -2.551474 -0.625231 -0.178409 0.123366 0.495271 +%! -2.551474 -0.606868 -0.178409 0.090344 0.495271 +%! -2.551474 -0.588505 -0.067751 0.123366 0.495271 +%! -2.551474 -0.597687 -0.067751 0.192645 0.495271 +%! -2.551474 -0.571522 -0.067751 0.106855 0.495271 +%! -2.551474 -0.591566 -0.067751 0.146459 0.495271 +%! -2.551474 -0.590801 -0.067751 0.140686 0.495271]; +%! for m = (1:9) +%! q = quantile (x, p, 1, m).'; +%! assert (q, a(m,:), 0.0001) +%! endfor + +%!test +%! p = 0.5; +%! x = [0.112600, 0.114800, 0.052100, 0.236400, 0.139300 +%! 0.171800, 0.727300, 0.204100, 0.453100, 0.158500 +%! 0.279500, 0.797800, 0.329600, 0.556700, 0.730700 +%! 0.428800, 0.875300, 0.647700, 0.628700, 0.816500 +%! 0.933100, 0.931200, 0.963500, 0.779600, 0.846100]; +%! tol = 0.00001; +%! x(5,5) = NaN; +%! assert (quantile(x, p, 1), [0.27950, 0.79780, 0.32960, 0.55670, 0.44460], tol); +%! x(1,1) = NaN; +%! assert (quantile(x, p, 1), [0.35415, 0.79780, 0.32960, 0.55670, 0.44460], tol); +%! x(3,3) = NaN; +%! assert (quantile(x, p, 1), [0.35415, 0.79780, 0.42590, 0.55670, 0.44460], tol); + +%!test +%! sx = [2, 3, 4]; +%! x = rand (sx); +%! dim = 2; +%! p = 0.5; +%! yobs = quantile (x, p, dim); +%! yexp = median (x, dim); +%! assert (yobs, yexp); + diff -r 9a4541c622b5 -r 0220da981c2a scripts/statistics/base/statistics.m --- a/scripts/statistics/base/statistics.m Tue Mar 25 23:06:45 2008 -0400 +++ b/scripts/statistics/base/statistics.m Tue Mar 25 23:57:49 2008 -0400 @@ -23,7 +23,8 @@ ## quartile, median, third quartile, maximum, mean, standard deviation, ## skewness and kurtosis of the columns of @var{x} as its rows. ## -## If @var{x} is a vector, treat it as a column vector. +## If @var{x} is a vector, calculate the statistics along the +## non-singleton dimension. ## @end deftypefn ## Author: KH @@ -59,25 +60,16 @@ error ("statistics: invalid argument"); endif - ## This code is a bit heavy, but is needed until empirical_inv - ## takes other than vector arguments. - c = sz(dim); - stride = prod (sz(1:dim-1)); - sz(dim) = 3; - emp_inv = zeros (sz); - for i = 1 : nel / c; - offset = i; - offset2 = 0; - while (offset > stride) - offset -= stride; - offset2++; - endwhile - rng = [0 : c-1] * stride + offset + offset2 * stride * c; - rng2 = [0 : 2] * stride + offset + offset2 * stride * 3; - emp_inv(rng2) = empirical_inv ([0.25; 0.5; 0.75], X(rng)); - endfor + emp_inv = quantile (X, [0.25; 0.5; 0.75], dim, 7); S = cat (dim, min (X, [], dim), emp_inv, max (X, [], dim), mean (X, dim), std (X, [], dim), skewness (X, dim), kurtosis (X, dim)); endfunction + +%!test +%! x = rand(7,5); +%! s = statistics (x); +%! m = median (x); +%! assert (m, s(3,:), eps); +