changeset 12423:06c3bbaa70b4 octave-forge

Initial commit of ttest, ttest2 and ztest.
author asnelt
date Sun, 13 Apr 2014 10:21:30 +0000
parents fe88d580dd72
children f54a41e480e2
files main/statistics/INDEX main/statistics/NEWS main/statistics/inst/ttest.m main/statistics/inst/ttest2.m main/statistics/inst/ztest.m
diffstat 5 files changed, 500 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/main/statistics/INDEX	Tue Apr 08 20:40:27 2014 +0000
+++ b/main/statistics/INDEX	Sun Apr 13 10:21:30 2014 +0000
@@ -72,6 +72,9 @@
 Hypothesis testing
  anderson_darling_test
  runstest
+ ttest
+ ttest2
+ ztest
 Fitting
  gamfit
 Clustering
--- a/main/statistics/NEWS	Tue Apr 08 20:40:27 2014 +0000
+++ b/main/statistics/NEWS	Sun Apr 13 10:21:30 2014 +0000
@@ -7,7 +7,7 @@
 
  ** The following functions are new:
  
-    cdf
+    cdf ttest ttest2 ztest
 
 Summary of important user-visible changes for statistics 1.2.3:
 -------------------------------------------------------------------
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/statistics/inst/ttest.m	Sun Apr 13 10:21:30 2014 +0000
@@ -0,0 +1,172 @@
+## Copyright (C) 2014 Tony Richardson
+##
+## This file is part of Octave.
+##
+## Octave is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or (at
+## your option) any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Function File} 
+## {[@var{h}, @var{pval}, @var{ci}, @var{stats} ] =} 
+## ttest (@var{x})
+##
+## {[@var{h}, @var{pval}, @var{ci}, @var{stats} ] =} 
+## ttest (@var{x}, @var{m})
+##
+## {[@var{h}, @var{pval}, @var{ci}, @var{stats} ] =} 
+## ttest (@var{x}, @var{y})
+##
+## {[@var{h}, @var{pval}, @var{ci}, @var{stats} ] =} 
+## ttest (..., @var{alpha})
+##
+## {[@var{h}, @var{pval}, @var{ci}, @var{stats} ] =} 
+## ttest (...,  @var{alpha}, @var{tail})
+##
+## {[@var{h}, @var{pval}, @var{ci}, @var{stats} ] =} 
+## ttest (...,  @var{alpha}, @var{tail}, @var{dim})
+##
+## Perform a T-test of the null hypothesis @code{mean (@var{x}) ==
+## @var{m}} for a sample @var{x} from a normal distribution with unknown
+## mean and unknown std deviation.  Under the null, the test statistic
+## @var{t} has a Student's t distribution.
+##
+## If the second argument @var{y} is a vector, a paired-t test of the
+## hypothesis mean(x) = mean(y) is performed.
+##
+## The argument @var{alpha} can be used to specify the significance level
+## of the test (the default value is 0.05).  The string
+## argument @var{tail}, can be used to select the desired alternative
+## hypotheses.  If @var{alt} is @qcode{"both"} (default) the null is 
+## tested against the two-sided alternative @code{mean (@var{x}) != @var{m}}.
+## If @var{alt} is @qcode{"right"} the one-sided 
+## alternative @code{mean (@var{x}) > @var{m}} is considered.
+## Similarly for @qcode{"left"}, the one-sided alternative @code{mean
+## (@var{x}) < @var{m}} is considered.  When argument @var{x} is a matrix
+## the @var{dim} argument can be used to selection the dimension over 
+## which to perform the test.  (The default is the first non-singleton
+## dimension.)
+##
+## If @var{h} is 0 the null hypothesis is accepted, if it is 1 the null
+## hypothesis is rejected. The p-value of the test is returned in @var{pval}.
+## A 100(1-alpha)% confidence interval is returned in @var{ci}. @var{stats}
+## is a structure containing the value of the test statistic (@var{tstat}),
+## the degrees of freedom (@var{df}) and the sample standard deviation
+## (@var{sd}).
+##
+## @end deftypefn
+
+## Author: Tony Richardson <richardson.tony@gmail.com>
+## Description: Hypothesis test for mean of a normal sample with unknown variance
+
+function [h, p, ci, stats] = ttest(x, my, alpha, tail, dim)
+  
+  my_default = 0;
+  alpha_default = 0.05;
+  tail_default  = 'both';
+
+  % Find the first non-singleton dimension of x
+  dim_default = min(find(size(x)~=1));
+  if isempty(dim_default), dim_default = 1; end
+
+  % Set the default argument values if input arguments are not present  
+  switch (nargin)
+    case 1
+      my = my_default;
+      alpha = alpha_default;
+      tail = tail_default;
+      dim = dim_default;
+    case 2
+      alpha = alpha_default;
+      tail = tail_default;
+      dim = dim_default;
+    case 3  
+      tail = tail_default;
+      dim = dim_default;
+    case 4
+      dim = dim_default;
+    case 5
+      % Do nothing here.
+      % This is a valid case
+    otherwise
+      err_msg = 'Invalid call to ttest. Correct usage is:';
+      err_msg = [err_msg '\n\n     ttest(x, m) or ttest(x, y)\n\n'];
+      error(err_msg,[]);
+  end
+  
+  if any(and(~isscalar(my),size(x)~=size(my)))
+    error('Arrays in paired test must be the same size.');
+  end
+
+  % Set default values if arguments are present but empty
+  if isempty(my)
+    my = my_default;
+  end
+  if isempty(alpha)
+    alpha = alpha_default;
+  end
+  if isempty(tail)
+    tail = tail_default;
+  end
+  if isempty(dim)
+    dim = dim_default;
+  end
+  
+  if ~isa(tail, 'char')
+    error('Fifth argument to ttest must be a string\n',[]);
+  end
+  
+  % This adjustment allows everything else to remain the
+  % same for both the one-sample t test and paired tests.
+  x = x - my;
+  
+  % Calculate the test statistic value (tval)
+  n = size(x, dim);
+  x_bar = mean(x, dim);
+  stats.tstat = 0;
+  stats.df = n-1;
+  stats.sd = std(x, 0, dim);
+  x_bar_std = stats.sd/sqrt(n);
+  tval = (x_bar)./x_bar_std;
+  stats.tstat = tval;
+
+  % Based on the "tail" argument determine the P-value, the critical values,
+  % and the confidence interval.
+  switch lower(tail)
+    case 'both'
+      p = 2*(1 - tcdf(abs(tval),n-1));
+      tcrit = -tinv(alpha/2,n-1);
+      ci = [x_bar-tcrit*x_bar_std; x_bar+tcrit*x_bar_std];
+    case 'left'
+      p = tcdf(tval,n-1);
+      tcrit = -tinv(alpha,n-1);
+      ci = [-inf*ones(size(x_bar)); x_bar+tcrit*x_bar_std];
+    case 'right'
+      p = 1 - tcdf(tval,n-1);
+      tcrit = -tinv(alpha,n-1);
+      ci = [x_bar-tcrit*x_bar_std; inf*ones(size(x_bar))];
+    otherwise
+      error('Invalid fifth (tail) argument to ttest\n',[]);
+  end
+
+  % Reshape the ci array to match MATLAB shaping
+  if and(isscalar(x_bar), dim==2)
+    ci = ci(:)';
+  elseif size(x_bar,2)<size(x_bar,1)
+    ci = reshape(ci(:),length(x_bar),2);
+  end
+
+  % Determine the test outcome
+  % MATLAB returns this a double instead of a logical array
+  h = double(p < alpha);  
+end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/statistics/inst/ttest2.m	Sun Apr 13 10:21:30 2014 +0000
@@ -0,0 +1,183 @@
+## Copyright (C) 2014 Tony Richardson
+##
+## This file is part of Octave.
+##
+## Octave is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or (at
+## your option) any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Function File} 
+## {[@var{h}, @var{pval}, @var{ci}, @var{stats} ] =} 
+## ttest2 (@var{x}, @var{y})
+##
+## {[@var{h}, @var{pval}, @var{ci}, @var{stats} ] =} 
+## ttest2 (@var{x}, @var{y}, @var{alpha})
+##
+## {[@var{h}, @var{pval}, @var{ci}, @var{stats} ] =} 
+## ttest2 (@var{x}, @var{y},  @var{alpha}, @var{tail})
+##
+## {[@var{h}, @var{pval}, @var{ci}, @var{stats} ] =} 
+## ttest2 (@var{x}, @var{y},  @var{alpha}, @var{tail}, @var{vartype})
+##
+## {[@var{h}, @var{pval}, @var{ci}, @var{stats} ] =} 
+## ttest2 (@var{x}, @var{y},  @var{alpha}, @var{tail}, @var{vartype}, @var{dim})
+##
+## Perform a T-test of the null hypothesis @code{mean (@var{x}) ==
+## @var{m}} for a sample @var{x} from a normal distribution with unknown
+## mean and unknown std deviation.  Under the null, the test statistic
+## @var{t} has a Student's t distribution.
+##
+## If the second argument @var{y} is a vector, a paired-t test of the
+## hypothesis mean(x) = mean(y) is performed.
+##
+## The argument @var{alpha} can be used to specify the significance level
+## of the test (the default value is 0.05).  The string
+## argument @var{tail}, can be used to select the desired alternative
+## hypotheses.  If @var{alt} is @qcode{"both"} (default) the null is 
+## tested against the two-sided alternative @code{mean (@var{x}) != @var{m}}.
+## If @var{alt} is @qcode{"right"} the one-sided 
+## alternative @code{mean (@var{x}) > @var{m}} is considered.
+## Similarly for @qcode{"left"}, the one-sided alternative @code{mean
+## (@var{x}) < @var{m}} is considered.  When @var{vartype} is 'equal'
+## the variances are assumed to be equal (this is the default).  When
+## @var{vartype} is 'unequal' the variances are not assumed equal.
+## When argument @var{x} is a matrix the @var{dim} argument can be 
+## used to selection the dimension over which to perform the test.
+## (The default is the first non-singleton dimension.)
+##
+## If @var{h} is 0 the null hypothesis is accepted, if it is 1 the null
+## hypothesis is rejected. The p-value of the test is returned in @var{pval}.
+## A 100(1-alpha)% confidence interval is returned in @var{ci}. @var{stats}
+## is a structure containing the value of the test statistic (@var{tstat}),
+## the degrees of freedom (@var{df}) and the sample standard deviation
+## (@var{sd}).
+##
+## @end deftypefn
+
+## Author: Tony Richardson <richardson.tony@gmail.com>
+## Description: Two sample Hypothesis test for mean of a normal sample with unknown variance
+
+function [h, p, ci, stats] = ttest2(x, y, alpha, tail, vartype, dim)
+  
+  alpha_default = 0.05;
+  tail_default  = 'both';
+  vartype_default = 'equal';
+
+  % Find the first non-singleton dimension of x
+  dim_default = min(find(size(x)~=1));
+  if isempty(dim_default), dim_default = 1; end
+
+  % Set the default argument values if input arguments are not present  
+  switch (nargin)
+    case 2
+      alpha = alpha_default;
+      tail = tail_default;
+      vartype = vartype_default;
+      dim = dim_default;
+    case 3  
+      tail = tail_default;
+      vartype = vartype_default;
+      dim = dim_default;
+    case 4
+      vartype = vartype_default;
+      dim = dim_default;
+    case 5
+      dim = dim_default;
+    case 6
+      % Do nothing here.
+      % This is a valid case
+    otherwise
+      err_msg = 'Invalid call to ttest2. Correct usage is:';
+      err_msg = [err_msg '\n\n     ttest2(x, y)\n\n'];
+      error(err_msg,[]);
+  end
+  
+  % Set default values if arguments are present but empty
+  if isempty(alpha)
+    alpha = alpha_default;
+  end
+  if isempty(tail)
+    tail = tail_default;
+  end
+  if isempty(vartype)
+    vartype = vartype_default;
+  end
+  if isempty(dim)
+    dim = dim_default;
+  end
+  
+  if ~isa(tail, 'char')
+    error('Fourth argument to ttest2 must be a string\n',[]);
+  end
+  
+  m = size(x, dim);
+  n = size(y, dim);
+  x_bar = mean(x,dim)-mean(y,dim);
+  s1_var = var(x, 0, dim);
+  s2_var = var(y, 0, dim);
+
+  switch lower(vartype)
+    case 'equal'
+      stats.tstat = 0;
+      stats.df = (m + n - 2)*ones(size(x_bar));
+      sp_var = ((m-1)*s1_var + (n-1)*s2_var)./stats.df;
+      stats.sd = sqrt(sp_var);
+      x_bar_std = sqrt(sp_var*(1/m+1/n));
+    case 'unequal'
+      stats.tstat = 0;
+      se1 = sqrt(s1_var/m);
+      se2 = sqrt(s2_var/n);
+      sp_var = s1_var/m + s2_var/n;
+      stats.df = ((se1.^2+se2.^2).^2 ./ (se1.^4/(m-1) + se2.^4/(n-1)));
+      stats.sd = [sqrt(s1_var); sqrt(s2_var)];
+      x_bar_std = sqrt(sp_var);
+    otherwise
+      error('Invalid fifth (vartype) argument to ttest2\n',[]);
+  end
+
+  stats.tstat = x_bar./x_bar_std;
+
+      % Based on the "tail" argument determine the P-value, the critical values,
+  % and the confidence interval.
+  switch lower(tail)
+    case 'both'
+      p = 2*(1 - tcdf(abs(stats.tstat),stats.df));
+      tcrit = -tinv(alpha/2,stats.df);
+      %ci = [x_bar-tcrit*stats.sd; x_bar+tcrit*stats.sd];
+      ci = [x_bar-tcrit.*x_bar_std; x_bar+tcrit.*x_bar_std];
+    case 'left'
+      p = tcdf(stats.tstat,stats.df);
+      tcrit = -tinv(alpha,stats.df);
+      ci = [-inf*ones(size(x_bar)); x_bar+tcrit.*x_bar_std];
+    case 'right'
+      p = 1 - tcdf(stats.tstat,stats.df);
+      tcrit = -tinv(alpha,stats.df);
+      ci = [x_bar-tcrit.*x_bar_std; inf*ones(size(x_bar))];
+    otherwise
+      error('Invalid fourth (tail) argument to ttest2\n',[]);
+  end
+
+  % Reshape the ci array to match MATLAB shaping
+  if and(isscalar(x_bar), dim==2)
+    ci = ci(:)';
+    stats.sd = stats.sd(:)';
+  elseif size(x_bar,2)<size(x_bar,1)
+    ci = reshape(ci(:),length(x_bar),2);
+    stats.sd = reshape(stats.sd(:),length(x_bar),2);
+  end
+
+  % Determine the test outcome
+  % MATLAB returns this a double instead of a logical array
+  h = double(p < alpha);  
+end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/statistics/inst/ztest.m	Sun Apr 13 10:21:30 2014 +0000
@@ -0,0 +1,141 @@
+## Copyright (C) 2014 Tony Richardson
+##
+## This file is part of Octave.
+##
+## Octave is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or (at
+## your option) any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Function File} 
+## {[@var{h}, @var{pval}, @var{ci}, @var{z}, @var{zcrit} ] =} 
+## ztest (@var{x}, @var{m}, @var{s})
+##
+## {[@var{h}, @var{pval}, @var{ci}, @var{z}, @var{zcrit} ] =} 
+## ztest (@var{x}, @var{m}, @var{s}, @var{alpha})
+##
+## {[@var{h}, @var{pval}, @var{ci}, @var{z}, @var{zcrit} ] =} 
+## ztest (@var{x}, @var{m}, @var{s}, @var{alpha}, @var{tail})
+##
+## {[@var{h}, @var{pval}, @var{ci}, @var{z}, @var{zcrit} ] =} 
+## ztest (@var{x}, @var{m}, @var{s}, @var{alpha}, @var{tail}, @var{dim})
+##
+## Perform a Z-test of the null hypothesis @code{mean (@var{x}) ==
+## @var{m}} for a sample @var{x} from a normal distribution with unknown
+## mean and known std deviation @var{s}.  Under the null, the test statistic
+## @var{z} follows a standard normal distribution.
+##
+## The argument @var{alpha} can be used to specify the significance level
+## of the test (the default value is 0.05).  The string
+## argument @var{tail}, can be used to select the desired alternative
+## hypotheses.  If @var{alt} is @qcode{"both"} (default) the null is 
+## tested against the two-sided alternative @code{mean (@var{x}) != @var{m}}.
+## If @var{alt} is @qcode{"right"} the one-sided 
+## alternative @code{mean (@var{x}) > @var{m}} is considered.
+## Similarly for @qcode{"left"}, the one-sided alternative @code{mean
+## (@var{x}) < @var{m}} is considered.  When argument @var{x} is a matrix
+## the @var{dim} argument can be used to selection the dimension over 
+## which to perform the test.  (The default is the first non-singleton
+## dimension.)
+##
+## If @var{h} is 0 the null hypothesis is accepted, if it is 1 the null
+## hypothesis is rejected. The p-value of the test is returned in @var{pval}.
+%% A 100(1-alpha)% confidence interval is returned in @var{ci}.  The test statistic
+## value is returned in @var{z} and the z critical value in @var{zcrit}.
+##
+## @end deftypefn
+
+## Author: Tony Richardson <richardson.tony@gmail.com>
+## Description: Hypothesis test for mean of a normal sample with known variance
+
+function [h, p, ci, zval, zcrit] = ztest(x, m, sigma, alpha, tail, dim)
+  
+  alpha_default = 0.05;
+  tail_default  = 'both';
+
+  % Find the first non-singleton dimension of x
+  dim_default = min(find(size(x)~=1));
+  if isempty(dim_default), dim_default = 1; end
+  
+  % Set the default argument values if input arguments are not present
+  switch (nargin)
+    case 3
+      alpha = alpha_default;
+      tail = tail_default;
+      dim = dim_default;
+    case 4  
+      tail = tail_default;
+      dim = dim_default;
+    case 5
+      dim = dim_default;
+    case 6
+      % Do nothing here.
+      % This is a valid case
+    otherwise
+      err_msg = 'Invalid call to ztest. Correct usage is:';
+      err_msg = [err_msg '\n\n     ztest(x, m, sigma)\n\n'];
+      error(err_msg,[]);
+  end
+  
+  % Set default values if arguments are present but empty
+  if isempty(alpha)
+    alpha = alpha_default;
+  end
+  if isempty(tail)
+    tail = tail_default;
+  end
+  if isempty(dim)
+    dim = dim_default;
+  end
+  
+  if ~isa(tail, 'char')
+    error('Fifth argument to ztest must be a string\n',[]);
+  end
+  
+  % Calculate the test statistic value (zval)
+  n = size(x, dim);
+  x_bar = mean(x, dim);
+  x_bar_std = sigma/sqrt(n);
+  zval = (x_bar - m)./x_bar_std;
+  
+  % Based on the "tail" argument determine the P-value, the critical values,
+  % and the confidence interval.
+  switch lower(tail)
+    case 'both'
+      p = 2*(1 - normcdf(abs(zval)));
+      zcrit = -norminv(alpha/2);
+      ci = [x_bar-zcrit*x_bar_std; x_bar+zcrit*x_bar_std];
+    case 'left'
+      p = normcdf(zval);
+      zcrit = -norminv(alpha);
+      ci = [-inf*ones(size(x_bar)); x_bar+zcrit*x_bar_std];
+    case 'right'
+      p = 1 - normcdf(zval);
+      zcrit = -norminv(alpha);
+      ci = [x_bar-zcrit*x_bar_std; inf*ones(size(x_bar))];
+    otherwise
+      error('Invalid fifth (tail) argument to ztest\n',[]);
+  end
+  
+  % Reshape the ci array to match MATLAB shaping
+  if and(isscalar(x_bar), dim==2)
+    ci = ci(:)';
+  elseif size(x_bar,2)<size(x_bar,1)
+    ci = reshape(ci(:),length(x_bar),2);
+  end
+
+  % Determine the test outcome
+  % MATLAB returns this a double instead of a logical array
+  h = double(p < alpha);
+  
+end