view scripts/statistics/base/__quantile__.m @ 8920:eb63fbe60fab

update copyright notices
author John W. Eaton <jwe@octave.org>
date Sat, 07 Mar 2009 10:41:27 -0500
parents 7d48766c21a5
children
line wrap: on
line source

## Copyright (C) 2008, 2009 Ben Abbott and Jaroslav Hajek
##
## This file is part of Octave.
##
## Octave is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or (at
## your option) any later version.
##
## Octave is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <http://www.gnu.org/licenses/>.

## -*- texinfo -*-
## @deftypefn {Function File} {@var{q} =} __quantile__ (@var{x}, @var{p})
## @deftypefnx {Function File} {@var{q} =} __quantile__ (@var{x}, @var{p}, @var{method})
## Undocumented internal function.
## @end deftypefn

## For the cumulative probability values in @var{p}, compute the 
## quantiles, @var{q} (the inverse of the cdf), for the sample, @var{x}.
##
## The optional input, @var{method}, refers to nine methods available in R
## (http://www.r-project.org/). The default is @var{method} = 7. For more 
## detail, see `help quantile'.
## @seealso{prctile, quantile, statistics}

## Author: Ben Abbott <bpabbott@mac.com>
## Vectorized version: Jaroslav Hajek <highegg@gmail.com>
## Description: Quantile function of a empirical samples

function inv = __quantile__ (x, p, method = 5)

  if (nargin < 2 || nargin > 3)
    print_usage ();
  endif

  if (! ismatrix (x))
    error ("quantile: x must be a matrix");
  endif

  ## Save length and set shape of quantiles.
  n = numel (p);
  p = p(:);

  ## Save length and set shape of samples.
  ## FIXME: does sort guarantee that NaN's come at the end?
  x = sort (x);
  m = sum (! isnan (x));
  mx = size (x, 1);
  nx = size (x, 2);

  ## Initialize output values.
  inv = Inf*(-(p < 0) + (p > 1));
  inv = repmat (inv, 1, nx);

  ## Do the work.
  if (any(k = find((p >= 0) & (p <= 1))))
    n = length (k);
    p = p (k);
    ## Special case.
    if (mx == 1)
      inv(k,:) = repmat (x, n, 1);
      return
    endif

    ## The column-distribution indices.
    pcd = kron (ones (n, 1), mx*(0:nx-1));
    mm = kron (ones (n, 1), m);
    switch method
      case {1, 2, 3}
        switch method
          case 1
	    p = max (ceil (kron (p, m)), 1);
	    inv(k,:) = x(p + pcd);

          case 2
	    p = kron (p, m);
	    p_lr = max (ceil (p), 1);
	    p_rl = min (floor (p + 1), mm);
	    inv(k,:) = (x(p_lr + pcd) + x(p_rl + pcd))/2;

          case 3
           ## Used by SAS, method PCTLDEF=2.
           ## http://support.sas.com/onlinedoc/913/getDoc/en/statug.hlp/stdize_sect14.htm
	    t = max (kron (p, m), 1);
	    t = roundb (t);
	    inv(k,:) = x(t + pcd);
        endswitch

      otherwise
        switch method
          case 4
	    p = kron (p, m);

          case 5
            ## Used by Matlab.
	    p = kron (p, m) + 0.5;

          case 6
            ## Used by Minitab and SPSS.
	    p = kron (p, m+1);

          case 7
            ## Used by S and R.
	    p = kron (p, m-1) + 1;

          case 8
            ## Median unbiased .
	    p = kron (p, m+1/3) + 1/3;

          case 9
            ## Approximately unbiased respecting order statistics.
	    p = kron (p, m+0.25) + 0.375;

          otherwise
            error ("quantile: Unknown method, '%d'", method);
        endswitch

	## Duplicate single values.
	imm1 = mm == 1;
	x(2,imm1) = x(1,imm1);

	## Interval indices.
	pi = max (min (floor (p), mm-1), 1);
	pr = max (min (p - pi, 1), 0);
	pi += pcd;
	inv(k,:) = (1-pr) .* x(pi) + pr .* x(pi+1);
    endswitch
  endif

endfunction

%!test
%! p = 0.5;
%! x = sort (rand (11));
%! q = __quantile__ (x, p);
%! assert (q, x(6,:))

%!test
%! p = [0.00, 0.25, 0.50, 0.75, 1.00];
%! x = [1; 2; 3; 4];
%! a = [1.0000   1.0000   2.0000   3.0000   4.0000
%!      1.0000   1.5000   2.5000   3.5000   4.0000
%!      1.0000   1.0000   2.0000   3.0000   4.0000
%!      1.0000   1.0000   2.0000   3.0000   4.0000
%!      1.0000   1.5000   2.5000   3.5000   4.0000
%!      1.0000   1.2500   2.5000   3.7500   4.0000
%!      1.0000   1.7500   2.5000   3.2500   4.0000
%!      1.0000   1.4167   2.5000   3.5833   4.0000
%!      1.0000   1.4375   2.5000   3.5625   4.0000];
%! for m = (1:9)
%!   q = __quantile__ (x, p, m).';
%!   assert (q, a(m,:), 0.0001)
%! endfor

%!test
%! p = [0.00, 0.25, 0.50, 0.75, 1.00];
%! x = [1; 2; 3; 4; 5];
%! a = [1.0000   2.0000   3.0000   4.0000   5.0000
%!      1.0000   2.0000   3.0000   4.0000   5.0000
%!      1.0000   1.0000   2.0000   4.0000   5.0000
%!      1.0000   1.2500   2.5000   3.7500   5.0000
%!      1.0000   1.7500   3.0000   4.2500   5.0000
%!      1.0000   1.5000   3.0000   4.5000   5.0000
%!      1.0000   2.0000   3.0000   4.0000   5.0000
%!      1.0000   1.6667   3.0000   4.3333   5.0000
%!      1.0000   1.6875   3.0000   4.3125   5.0000];
%! for m = (1:9)
%!   q = __quantile__ (x, p, m).';
%!   assert (q, a(m,:), 0.0001)
%! endfor

%!test
%! p = [0.00, 0.25, 0.50, 0.75, 1.00];
%! x = [1; 2; 5; 9];
%! a = [1.0000   1.0000   2.0000   5.0000   9.0000
%!      1.0000   1.5000   3.5000   7.0000   9.0000
%!      1.0000   1.0000   2.0000   5.0000   9.0000
%!      1.0000   1.0000   2.0000   5.0000   9.0000
%!      1.0000   1.5000   3.5000   7.0000   9.0000
%!      1.0000   1.2500   3.5000   8.0000   9.0000
%!      1.0000   1.7500   3.5000   6.0000   9.0000
%!      1.0000   1.4167   3.5000   7.3333   9.0000
%!      1.0000   1.4375   3.5000   7.2500   9.0000];
%! for m = (1:9)
%!   q = __quantile__ (x, p, m).';
%!   assert (q, a(m,:), 0.0001)
%! endfor

%!test
%! p = [0.00, 0.25, 0.50, 0.75, 1.00];
%! x = [1; 2; 5; 9; 11];
%! a = [1.0000    2.0000    5.0000    9.0000   11.0000
%!      1.0000    2.0000    5.0000    9.0000   11.0000
%!      1.0000    1.0000    2.0000    9.0000   11.0000
%!      1.0000    1.2500    3.5000    8.0000   11.0000
%!      1.0000    1.7500    5.0000    9.5000   11.0000
%!      1.0000    1.5000    5.0000   10.0000   11.0000
%!      1.0000    2.0000    5.0000    9.0000   11.0000
%!      1.0000    1.6667    5.0000    9.6667   11.0000
%!      1.0000    1.6875    5.0000    9.6250   11.0000];
%! for m = (1:9)
%!   q = __quantile__ (x, p, m).';
%!   assert (q, a(m,:), 0.0001)
%! endfor

%!test
%! p = [0.00, 0.25, 0.50, 0.75, 1.00];
%! x = [16; 11; 15; 12; 15;  8; 11; 12;  6; 10];
%! a = [6.0000   10.0000   11.0000   15.0000   16.0000
%!      6.0000   10.0000   11.5000   15.0000   16.0000
%!      6.0000    8.0000   11.0000   15.0000   16.0000
%!      6.0000    9.0000   11.0000   13.5000   16.0000
%!      6.0000   10.0000   11.5000   15.0000   16.0000
%!      6.0000    9.5000   11.5000   15.0000   16.0000
%!      6.0000   10.2500   11.5000   14.2500   16.0000
%!      6.0000    9.8333   11.5000   15.0000   16.0000
%!      6.0000    9.8750   11.5000   15.0000   16.0000];
%! for m = (1:9)
%!   q = __quantile__ (x, p, m).';
%!   assert (q, a(m,:), 0.0001)
%! endfor

%!test
%! p = [0.00, 0.25, 0.50, 0.75, 1.00];
%! x = [-0.58851;  0.40048;  0.49527; -2.551500; -0.52057; ...
%!      -0.17841; 0.057322; -0.62523;  0.042906;  0.12337];
%! a = [-2.551474  -0.588505  -0.178409   0.123366   0.495271
%!      -2.551474  -0.588505  -0.067751   0.123366   0.495271
%!      -2.551474  -0.625231  -0.178409   0.123366   0.495271
%!      -2.551474  -0.606868  -0.178409   0.090344   0.495271
%!      -2.551474  -0.588505  -0.067751   0.123366   0.495271
%!      -2.551474  -0.597687  -0.067751   0.192645   0.495271
%!      -2.551474  -0.571522  -0.067751   0.106855   0.495271
%!      -2.551474  -0.591566  -0.067751   0.146459   0.495271
%!      -2.551474  -0.590801  -0.067751   0.140686   0.495271];
%! for m = (1:9)
%!   q = __quantile__ (x, p, m).';
%!   assert (q, a(m,:), 0.0001)
%! endfor

%!test
%! p = 0.5;
%! x = [0.112600, 0.114800, 0.052100, 0.236400, 0.139300
%!      0.171800, 0.727300, 0.204100, 0.453100, 0.158500
%!      0.279500, 0.797800, 0.329600, 0.556700, 0.730700
%!      0.428800, 0.875300, 0.647700, 0.628700, 0.816500
%!      0.933100, 0.931200, 0.963500, 0.779600, 0.846100];
%! tol = 0.00001;
%! x(5,5) = NaN;
%! assert (__quantile__ (x, p), [0.27950, 0.79780, 0.32960, 0.55670, 0.44460], tol);
%! x(1,1) = NaN;
%! assert (__quantile__ (x, p), [0.35415, 0.79780, 0.32960, 0.55670, 0.44460], tol);
%! x(3,3) = NaN;
%! assert (__quantile__ (x, p), [0.35415, 0.79780, 0.42590, 0.55670, 0.44460], tol);