view scripts/statistics/corr.m @ 31918:e67b7b85670b

cov.m: Overhaul function for matlab compatibility (bug #50583). * cov.m: Change two-array input handling such that separate x and y inputs are treated as two univariate distributions equivalent to cov (x(:), y(:)). Implement NANFLAG option for selectively ignoring NaN data elements. Correct empty input handling to produce Matlab compatible output. Add BISTs for new features and expand input validation tests. Update docstring to address new behaviors and at note about backwards incompatibility. * corr.m, corrcoef.m: Adapt calls to cov to account for the increase in cov return size. * etc/NEWS.9.md: Add note of changes to Matlab Compatibility section.
author Nicholas R. Jankowski <jankowski.nicholas@gmail.com>
date Wed, 22 Mar 2023 13:33:46 -0400
parents 597f3ee61a48
children 37e184a83cf4
line wrap: on
line source

########################################################################
##
## Copyright (C) 1996-2023 The Octave Project Developers
##
## See the file COPYRIGHT.md in the top-level directory of this
## distribution or <https://octave.org/copyright/>.
##
## This file is part of Octave.
##
## Octave is free software: you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## Octave is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <https://www.gnu.org/licenses/>.
##
########################################################################

## -*- texinfo -*-
## @deftypefn  {} {@var{r} =} corr (@var{x})
## @deftypefnx {} {@var{r} =} corr (@var{x}, @var{y})
## Compute matrix of correlation coefficients.
##
## If each row of @var{x} and @var{y} is an observation and each column is
## a variable, then the @w{(@var{i}, @var{j})-th} entry of
## @code{corr (@var{x}, @var{y})} is the correlation between the
## @var{i}-th variable in @var{x} and the @var{j}-th variable in @var{y}.
## @tex
## $$
## {\rm corr}(x,y) = {{\rm cov}(x,y) \over {\rm std}(x) \, {\rm std}(y)}
## $$
## @end tex
## @ifnottex
##
## @example
## corr (@var{x},@var{y}) = cov (@var{x},@var{y}) / (std (@var{x}) * std (@var{y}))
## @end example
##
## @end ifnottex
## If called with one argument, compute @code{corr (@var{x}, @var{x})},
## the correlation between the columns of @var{x}.
## @seealso{cov}
## @end deftypefn

function r = corr (x, y = [])

  if (nargin < 1)
    print_usage ();
  endif

  ## Input validation is done by cov.m.  Don't repeat tests here

  ## Special case, scalar is always 100% correlated with itself
  if (isscalar (x))
    if (isa (x, "single"))
      r = single (1);
    else
      r = 1;
    endif
    return;
  endif

  ## No check for division by zero error, which happens only when
  ## there is a constant vector and should be rare.
  if (nargin == 2)
    ## Adjust for Octave 9.1.0 compatability behavior change in two-input cov.
    ## cov now treats cov(x,y) as cov(x(:),y(:)), returning a 2x2 covariance
    ## of the two univariate distributions x and y.  corr will now pass [x,y]
    ## as cov([x,y]), which for m x n inputs will return 2n x 2n outputs, with
    ##  the off diagonal matrix quarters containing what was previously
    ## returned by cov(x,y).

    ## FIXME: Returning a larger than needed arary and discarding 3/4 of the
    ##        information is nonideal.  Consider implementing a more
    ##        efficient cov here as a subfunction to corr.

    nx = columns(x);
    c = cov ([x, y]);
    c = c(1:nx, nx+1:end);
    s = std (x)' * std (y);
    r = c ./ s;
  else
    c = cov (x);
    s = sqrt (diag (c));
    r = c ./ (s * s');
  endif

endfunction


%!test
%! x = rand (10);
%! cc1 = corr (x);
%! cc2 = corr (x, x);
%! assert (size (cc1) == [10, 10] && size (cc2) == [10, 10]);
%! assert (cc1, cc2, sqrt (eps));

%!test
%! x = [1:3]';
%! y = [3:-1:1]';
%! assert (corr (x, y), -1, 5*eps);
%! assert (corr (x, flipud (y)), 1, 5*eps);
%! assert (corr ([x, y]), [1 -1; -1 1], 5*eps);

%!test
%! x = single ([1:3]');
%! y = single ([3:-1:1]');
%! assert (corr (x, y), single (-1), 5*eps);
%! assert (corr (x, flipud (y)), single (1), 5*eps);
%! assert (corr ([x, y]), single ([1 -1; -1 1]), 5*eps);

%!assert (corr (5), 1)
%!assert (corr (single (5)), single (1))

## Test input validation
%!error <Invalid call> corr ()
%!error corr ([1; 2], ["A", "B"])
%!error corr (ones (2,2,2))
%!error corr (ones (2,2), ones (2,2,2))