Mercurial > octave
changeset 23273:87b6f3606fd4
corrcoef.m: New statistics function for Matlab compatibility (bug #47824).
* NEWS: Announce new function.
* scripts/statistics/base/module.mk: Add to build system.
* stats.txi: Add function to manual.
author | Guillaume Flandin |
---|---|
date | Mon, 13 Mar 2017 11:39:55 -0700 |
parents | 3983ac6f5920 |
children | 9cb69973997d |
files | NEWS doc/interpreter/stats.txi scripts/statistics/base/corrcoef.m scripts/statistics/base/module.mk |
diffstat | 4 files changed, 257 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/NEWS Sun Mar 12 20:56:44 2017 +0100 +++ b/NEWS Mon Mar 13 11:39:55 2017 -0700 @@ -20,6 +20,7 @@ ** Other new functions added in 4.4: + corrcoef gsvd hgtransform
--- a/doc/interpreter/stats.txi Sun Mar 12 20:56:44 2017 +0100 +++ b/doc/interpreter/stats.txi Mon Mar 13 11:39:55 2017 -0700 @@ -167,6 +167,8 @@ @DOCSTRING(corr) +@DOCSTRING(corrcoef) + @DOCSTRING(spearman) @DOCSTRING(kendall)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/statistics/base/corrcoef.m Mon Mar 13 11:39:55 2017 -0700 @@ -0,0 +1,253 @@ +## Copyright (C) 2016-2017 Guillaume Flandin +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or +## (at your option) any later version. +## your option) any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, see +## <http://www.gnu.org/licenses/>. + +## -*- texinfo -*- +## @deftypefn {} {@var{r} =} corrcoef (@var{x}) +## @deftypefnx {} {@var{r} =} corrcoef (@var{x}, @var{y}) +## @deftypefnx {} {[@var{r}, @var{p}] =} corrcoef (@dots{}) +## @deftypefnx {} {[@var{r}, @var{p}, @var{lci}, @var{hci}] =} corrcoef (@dots{}) +## @deftypefnx {} {[@dots{}] =} corrcoef (@dots{}, @var{param}, @var{value}, @dots{}) +## Compute a matrix of correlation coefficients. +## +## @var{x} is an array where each column contains a variable and each row is +## an observation. +## +## If a second input @var{y} (of the same size as @var{x}) is given then +## calculate the correlation coefficients between @var{x} and @var{y}. +## +## @var{r} is a matrix of Pearson's product moment correlation coefficients for +## each pair of variables. +## +## @var{p} is a matrix of pair-wise p-values testing for the null hypothesis of +## a correlation coefficient of zero. +## +## @var{lci} and @var{hci} are matrices containing, respectively, the lower and +## higher bounds of the 95% confidence interval of each correlation +## coefficient. +## +## @var{param}, @var{value} are pairs of optional parameters and values. +## Valid options are: +## +## @table @asis +## @item @qcode{"alpha"} +## Confidence level used for the definition of the bounds of the confidence +## interval, @var{lci} and @var{hci}. Default is 0.05, i.e., 95% confidence +## interval. +## +## @item @qcode{"rows"} +## Determine processing of NaN values. Acceptable values are @qcode{"all"}, +## @qcode{"complete"}, and @qcode{"pairwise"}. Default is @qcode{"all"}. +## With @qcode{"complete"}, only the rows without NaN values are considered. +## With @qcode{"pairwise"}, the selection of NaN-free rows is made for each +## pair of variables. +## +## @end table +## +## @seealso{corr, cov, cor_test} +## @end deftypefn + +## FIXME: It would be good to add a definition of the calculation method +## for a Pearson product moment correlation to the documentation. + +function [r, p, lci, hci] = corrcoef (x, varargin) + + if (nargin == 0) + print_usage (); + endif + + alpha = 0.05; + rows = "all"; + + if (nargin > 1) + + ## Check for numeric y argument + if (isnumeric (varargin{1})) + x = [x(:), varargin{1}(:)]; + varargin(1) = []; + endif + + ## Check for Parameter/Value arguments + for i = 1:2:numel (varargin) + + if (! ischar (varargin{i})) + error ("corrcoef: parameter %d must be a string", i); + endif + parameter = varargin{i}; + if (numel (varargin) < i+1) + error ('corrcoef: parameter "%s" missing value', parameter); + endif + value = varargin{i+1}; + + switch (tolower (parameter)) + case "alpha" + if (isnumeric (value) && isscalar (value) + && value >= 0 && value <= 1) + alpha = value; + else + error ('corrcoef: "alpha" must be a number between 0 and 1'); + endif + + case "rows" + if (! ischar (value)) + error ('corrcoef: "rows" value must be a string'); + endif + value = tolower (value); + switch (value) + case {"all", "complete", "pairwise"} + rows = value; + otherwise + error ('corrcoef: "rows" must be "all", "complete", or "pairwise".'); + endswitch + + otherwise + error ('corrcoef: Unknown option "%s"', parameter); + + endswitch + endfor + endif + + if (strcmp (rows, "complete")) + x(any (isnan (x), 2), :) = []; + endif + + if (isempty (x) || isscalar (x)) + r = p = lci = hci = NaN; + return; + endif + + ## Flags for calculation + pairwise = strcmp (rows, "pairwise"); + calc_pval = nargout > 1; + + if (isrow (x)) + x = x(:); + endif + [m, n] = size (x); + r = eye (n); + if (calc_pval) + p = eye (n); + endif + if (strcmp (rows, "pairwise")) + mpw = m * ones (n); + endif + for i = 1:n + if (! pairwise && any (isnan (x(:,i)))) + r(i,i) = NaN; + if (nargout > 1) + p(i,i) = NaN; + endif + endif + for j = i+1:n + xi = x(:,i); + xj = x(:,j); + if (pairwise) + idx = any (isnan ([xi xj]), 2); + xi(idx) = xj(idx) = []; + mpw(i,j) = mpw(j,i) = m - nnz (idx); + endif + r(i,j) = r(j,i) = corr (xi, xj); + if (calc_pval) + T = cor_test (xi, xj, "!=", "pearson"); + p(i,j) = p(j,i) = T.pval; + endif + endfor + endfor + + if (nargout > 2) + if (pairwise) + m = mpw; + endif + CI = sqrt (2) * erfinv (1-alpha) ./ sqrt (m-3); + lci = tanh (atanh (r) - CI); + hci = tanh (atanh (r) + CI); + endif + +endfunction + + +%!test +%! x = rand (5); +%! r = corrcoef (x); +%! assert (size (r) == [5, 5]); + +%!test +%! x = [1 2 3]; +%! r = corrcoef (x); +%! assert (size (r) == [1, 1]); + +%!test +%! x = []; +%! r = corrcoef (x); +%! assert (isnan (r)); + +%!test +%! x = [NaN]; +%! r = corrcoef (x); +%! assert (isnan (r)); + +%!test +%! x = [1]; +%! r = corrcoef (x); +%! assert (isnan (r)); + +%!test +%! x = [NaN NaN]; +%! r = corrcoef (x); +%! assert (size(r) == [1, 1] && isnan (r)); + +%!test +%! x = rand (5); +%! [r, p] = corrcoef (x); +%! assert (size (r) == [5, 5] && size (p) == [5 5]); + +%!test +%! x = rand (5,1); +%! y = rand (5,1); +%! R1 = corrcoef (x, y); +%! R2 = corrcoef ([x, y]); +%! assert (R1, R2); + +%!test +%! x = [1;2;3]; +%! y = [1;2;3]; +%! r = corrcoef (x, y); +%! assert (r, ones (2,2)); + +%!test +%! x = [1;2;3]; +%! y = [3;2;1]; +%! r = corrcoef (x, y); +%! assert (r, [1, -1; -1, 1]); + +%!test +%! x = [1;2;3]; +%! y = [1;1;1]; +%! r = corrcoef (x, y); +%! assert (r, [1, NaN; NaN, 1]); + +%!test +%!error corrcoef () +%!error <parameter 1 must be a string> corrcoef (1, 2, 3) +%!error <parameter "alpha" missing value> corrcoef (1, 2, "alpha") +%!error <"alpha" must be a number> corrcoef (1,2, "alpha", "1") +%!error <"alpha" must be a number> corrcoef (1,2, "alpha", ones (2,2)) +%!error <"alpha" must be a number between 0 and 1> corrcoef (1,2, "alpha", -1) +%!error <"alpha" must be a number between 0 and 1> corrcoef (1,2, "alpha", 2) +%!error <"rows" must be "all"...> corrcoef (1,2, "rows", "foobar") +%!error <Unknown option "foobar"> corrcoef (1,2, "foobar", 1)
--- a/scripts/statistics/base/module.mk Sun Mar 12 20:56:44 2017 +0100 +++ b/scripts/statistics/base/module.mk Mon Mar 13 11:39:55 2017 -0700 @@ -4,6 +4,7 @@ scripts/statistics/base/center.m \ scripts/statistics/base/cloglog.m \ scripts/statistics/base/corr.m \ + scripts/statistics/base/corrcoef.m \ scripts/statistics/base/cov.m \ scripts/statistics/base/gls.m \ scripts/statistics/base/histc.m \