Mercurial > forge
changeset 5684:e692af2b7645 octave-forge
extended input arguments
author | schloegl |
---|---|
date | Mon, 25 May 2009 13:46:36 +0000 |
parents | be2ec0458410 |
children | 4263b7afe789 |
files | extra/NaN/inst/corrcoef.m extra/NaN/inst/iqr.m |
diffstat | 2 files changed, 74 insertions(+), 25 deletions(-) [+] |
line wrap: on
line diff
--- a/extra/NaN/inst/corrcoef.m Mon May 25 08:04:36 2009 +0000 +++ b/extra/NaN/inst/corrcoef.m Mon May 25 13:46:36 2009 +0000 @@ -1,27 +1,39 @@ -function [R,sig,ci1,ci2,nan_sig] = corrcoef(X,Y,Mode); +function [R,sig,ci1,ci2,nan_sig] = corrcoef(X,Y,varargin); % CORRCOEF calculates the correlation matrix from pairwise correlations. % The input data can contain missing values encoded with NaN. % Missing data (NaN's) are handled by pairwise deletion [15]. % In order to avoid possible pitfalls, use case-wise deletion or % or check the correlation of NaN's with your data (see below). % A significance test for testing the Hypothesis -% "correlation coefficient R is significantly different to zero" +% 'correlation coefficient R is significantly different to zero' % is included. % -% [...] = CORRCOEF(X [,Mode]); +% [...] = CORRCOEF(X); % calculates the (auto-)correlation matrix of X -% [...] = CORRCOEF(X,Y [,Mode]); +% [...] = CORRCOEF(X,Y); % calculates the crosscorrelation between X and Y % +% [...] = CORRCOEF(..., Mode); % Mode='Pearson' or 'parametric' [default] % gives the correlation coefficient -% also known as the "product-moment coefficient of correlation" -% or "Pearson's correlation" [1] -% Mode='Spearman' gives "Spearman's Rank Correlation Coefficient" +% also known as the 'product-moment coefficient of correlation' +% or 'Pearson''s correlation' [1] +% Mode='Spearman' gives 'Spearman''s Rank Correlation Coefficient' % This replaces SPEARMAN.M % Mode='Rank' gives a nonparametric Rank Correlation Coefficient % This replaces RANKCORR.M % +% [...] = CORRCOEF(..., param1, value1, param2, value2, ... ); +% param value +% 'Mode' type of correlation +% 'Pearson','parametric' +% 'Spearman' +% 'rank' +% 'rows' how do deal with missing values encoded as NaN's. +% 'complete': remove all rows with at least one NaN +% 'pairwise': [default] +% 'alpha' 0.01 : significance level to compute confidence interval +% % [R,p,ci1,ci2,nansig] = CORRCOEF(...); % R is the correlation matrix % R(i,j) is the correlation coefficient r between X(:,i) and Y(:,j) @@ -29,13 +41,13 @@ % It tests the null hypothesis that the product moment correlation coefficient is zero % using Student's t-test on the statistic t = r*sqrt(N-2)/sqrt(1-r^2) % where N is the number of samples (Statistics, M. Spiegel, Schaum series). -% p > alpha: do not reject the Null hypothesis: "R is zero". -% p < alpha: The alternative hypothesis "R is larger than zero" is true with probability (1-alpha). +% p > alpha: do not reject the Null hypothesis: 'R is zero'. +% p < alpha: The alternative hypothesis 'R is larger than zero' is true with probability (1-alpha). % ci1 lower (1-alpha) confidence interval % ci2 upper (1-alpha) confidence interval -% The default alpha is 0.01, and can be changed with function flag_implicit_significance. -% nan_sig p-value whether H0: "NaN's are not correlated" could be correct -% if nan_sig < alpha, H1 ("NaNs are correlated") is very likely. +% If no alpha is provided, the default alpha is 0.01. This can be changed with function flag_implicit_significance. +% nan_sig p-value whether H0: 'NaN''s are not correlated' could be correct +% if nan_sig < alpha, H1 ('NaNs are correlated') is very likely. % % The result is only valid if the occurence of NaN's is uncorrelated. In % order to avoid this pitfall, the correlation of NaN's should be checked @@ -52,7 +64,7 @@ % Further recommandation related to the correlation coefficient: % + LOOK AT THE SCATTERPLOTS to make sure that the relationship is linear % + Correlation is not causation because -% it is not clear which parameter is "cause" and which is "effect" and +% it is not clear which parameter is 'cause' and which is 'effect' and % the observed correlation between two variables might be due to the action of other, unobserved variables. % % see also: SUMSKIPNAN, COVM, COV, COR, SPEARMAN, RANKCORR, RANKS, @@ -71,7 +83,7 @@ % [12] http://www.janda.org/c10/Lectures/topic06/L24-significanceR.htm % [13] http://faculty.vassar.edu/lowry/ch4apx.html % [14] http://davidmlane.com/hyperstat/B134689.html -% [15] http://www.statsoft.com/textbook/stbasic.html#Correlations +% [15] http://www.statsoft.com/textbook/stbasic.html%Correlations % others % [20] http://www.tufts.edu/~gdallal/corr.htm @@ -110,23 +122,41 @@ global FLAG_NANS_OCCURED; NARG = nargout; % needed because nargout is not reentrant in Octave, and corrcoef is recursive +mode = []; if nargin==1 Y = []; Mode='Pearson'; elseif nargin==0 fprintf(2,'Error CORRCOEF: Missing argument(s)\n'); -elseif nargin==2 +elseif nargin>1 if ischar(Y) - Mode=Y; + varg = [Y,varargin]; Y=[]; + else + varg = varargin; + end; + + if length(varg)<1, + Mode = 'Pearson'; + elseif length(varg)==1, + Mode = varg{1}; + end; + + for k=2:2:length(varg), + mode=setfield(mode,lower(varg{k-1}),varg{k}); + end; + if isfield(mode,'mode') + Mode = mode.mode; else - Mode='Pearson'; - end; + Mode = 'Pearson'; + end; end; Mode=[Mode,' ']; -FLAG_WARNING = warning; % save warning status + + +%FLAG_WARNING = warning; % save warning status warning('off'); [r1,c1]=size(X); @@ -146,7 +176,23 @@ YESNAN = any(isnan(X(:))) | any(isnan(Y(:))); if YESNAN, FLAG_NANS_OCCURED=(1==1); -end; + if isfield(mode,'rows') + if strcmp(mode.rows,'complete') + ix = ~any([X,Y],2); + X = X(ix,:); + if ~isempty(Y) + Y = Y(ix,:); + end; + YESNAN = 0; + NN = size(X,1); + elseif strcmp(mode.rows,'all') + fprintf(1,'Warning: data contains NaNs, rows=pairwise is used.'); + %%NN(NN < size(X,1)) = NaN; + elseif strcmp(mode.rows,'pairwise') + %%% default + end; + end; +end; if isempty(Y), IX = ones(c1)-diag(ones(c1,1)); [jx, jy ] = find(IX); @@ -257,13 +303,15 @@ end; if (NARG<2), - warning(FLAG_WARNING); % restore warning status +% warning(FLAG_WARNING); % restore warning status return; end; % CONFIDENCE INTERVAL -if exist('flag_implicit_significance')==2, +if isfield(mode,'alpha') + alpha = mode.alpha; +elseif exist('flag_implicit_significance')==2, alpha = flag_implicit_significance; else alpha = 0.01; @@ -304,7 +352,7 @@ %ci1(isnan(ci1))=R(isnan(ci1)); % in case of isnan(ci), the interval limits are exactly the R value %ci2(isnan(ci2))=R(isnan(ci2)); -if (NARG<5) || ~YESNAN, +if (NARG<5) | ~YESNAN, nan_sig = repmat(NaN,size(R)); warning(FLAG_WARNING); % restore warning status return; @@ -329,5 +377,6 @@ %%%%% ----- end of independence check ------ -warning(FLAG_WARNING); % restore warning status +%warning(FLAG_WARNING); % restore warning status return; +