view scripts/general/accumarray.m @ 13153:25effffba9b0

maint: Periodic merge of stable to default
author Jordi Gutiérrez Hermoso <>
date Sat, 17 Sep 2011 22:07:41 -0500
parents cefd568ea073 8d5f0b41e6b0
children 8aaaef4a69aa
line wrap: on
line source

## Copyright (C) 2007-2011 David Bateman
## Copyright (C) 2009-2010 VZLU Prague
## This file is part of Octave.
## Octave is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or (at
## your option) any later version.
## Octave is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## General Public License for more details.
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <>.

## -*- texinfo -*-
## @deftypefn  {Function File} {} accumarray (@var{subs}, @var{vals}, @var{sz}, @var{func}, @var{fillval}, @var{issparse})
## @deftypefnx {Function File} {} accumarray (@var{subs}, @var{vals}, @dots{})
## Create an array by accumulating the elements of a vector into the
## positions defined by their subscripts.  The subscripts are defined by
## the rows of the matrix @var{subs} and the values by @var{vals}.  Each
## row of @var{subs} corresponds to one of the values in @var{vals}. If
## @var{vals} is a scalar, it will be used for each of the row of
## @var{subs}.
## The size of the matrix will be determined by the subscripts
## themselves. However, if @var{sz} is defined it determines the matrix
## size. The length of @var{sz} must correspond to the number of columns
## in @var{subs}.
## The default action of @code{accumarray} is to sum the elements with
## the same subscripts.  This behavior can be modified by defining the
## @var{func} function.  This should be a function or function handle
## that accepts a column vector and returns a scalar.  The result of the
## function should not depend on the order of the subscripts.
## The elements of the returned array that have no subscripts associated
## with them are set to zero.  Defining @var{fillval} to some other
## value allows these values to be defined.
## By default @code{accumarray} returns a full matrix.  If
## @var{issparse} is logically true, then a sparse matrix is returned
## instead.
## The following @code{accumarray} example constructs a frequency table
## that in the first column counts how many occurrences each number in
## the second column has, taken from the vector @var{x}. Note the usage
## of @code{unique}  for assigning to all repeated elements of @var{x}
## the same index (@xref{doc-unique}).
## @example
## @group
## x = [91, 92, 90, 92, 90, 89, 91, 89, 90, 100, 100, 100];
## [u, ~, j] = unique (x);
## [accumarray(j', 1), u']
## @result{} 2    89
##    3    90
##    2    91
##    2    92
##    3   100
## @end group
## @end example
## Another example, where the result is a multidimensional 3D array and
## the default value (zero) appears in the output:
## @example
## @group
## accumarray ([1, 1, 1;
##              2, 1, 2;
##              2, 3, 2;
##              2, 1, 2;
##              2, 3, 2], 101:105)
## @result{} ans(:,:,1) = [101, 0, 0; 0, 0, 0]
##    ans(:,:,2) = [0, 0, 0; 206, 0, 208]
## @end group
## @end example
## The complexity in the non-sparse case is generally O(M+N), where N is
## the number of subscripts and M is the maximum subscript (linearized
## in multi-dimensional case). If @var{func} is one of @code{@@sum}
## (default), @code{@@max}, @code{@@min} or @code{@@(x) @{x@}}, an
## optimized code path is used. Note that for general reduction function
## the interpreter overhead can play a major part and it may be more
## efficient to do multiple accumarray calls and compute the results in
## a vectorized manner.
## @seealso{accumdim, unique}
## @end deftypefn

function A = accumarray (subs, vals, sz = [], func = [], fillval = [], issparse = [])

  if (nargin < 2 || nargin > 6)
    print_usage ();

  if (iscell (subs))
    subs = cellfun ("vec", subs, "uniformoutput", false);
    ndims = numel (subs);
    if (ndims == 1)
      subs = subs{1};
    ndims = columns (subs);

  if (isempty (fillval))
    fillval = 0;

  if (isempty (issparse))
    issparse = false;

  if (issparse)

    ## Sparse case. Avoid linearizing the subscripts, because it could overflow.

    if (fillval != 0)
      error ("accumarray: FILLVAL must be zero in the sparse case");

    ## Ensure subscripts are a two-column matrix.
    if (iscell (subs))
      subs = [subs{:}];

    ## Validate dimensions.
    if (ndims == 1)
      subs(:,2) = 1;
    elseif (ndims != 2)
      error ("accumarray: in the sparse case, needs 1 or 2 subscripts");

    if (isnumeric (vals) || islogical (vals))
      vals = double (vals);
      error ("accumarray: in the sparse case, values must be numeric or logical");

    if (! (isempty (func) || func == @sum))

      ## Reduce values. This is not needed if we're about to sum them, because
      ## "sparse" can do that.

      ## Sort indices.
      [subs, idx] = sortrows (subs);
      n = rows (subs);
      ## Identify runs.
      jdx = find (any (diff (subs, 1, 1), 2));
      jdx = [jdx; n];

      vals = cellfun (func, mat2cell (vals(:)(idx), diff ([0; jdx])));
      subs = subs(jdx, :);
      mode = "unique";
      mode = "sum";

    ## Form the sparse matrix.
    if (isempty (sz))
      A = sparse (subs(:,1), subs(:,2), vals, mode);
    elseif (length (sz) == 2)
      A = sparse (subs(:,1), subs(:,2), vals, sz(1), sz(2), mode);
      error ("accumarray: dimensions mismatch");


    ## Linearize subscripts.
    if (ndims > 1)
      if (isempty (sz))
        if (iscell (subs))
          sz = cellfun ("max", subs);
          sz = max (subs, [], 1);
      elseif (ndims != length (sz))
        error ("accumarray: dimensions mismatch");

      ## Convert multidimensional subscripts.
      if (ismatrix (subs))
        subs = num2cell (subs, 1);
      subs = sub2ind (sz, subs{:}); # creates index cache
    elseif (! isempty (sz) && length (sz) < 2)
      error ("accumarray: needs at least 2 dimensions");
    elseif (! isindex (subs)) # creates index cache
      error ("accumarray: indices must be positive integers");

    ## Some built-in reductions handled efficiently.

    if (isempty (func) || func == @sum)
      ## Fast summation.
      if (isempty (sz))
        A = __accumarray_sum__ (subs, vals);
        A = __accumarray_sum__ (subs, vals, prod (sz));
        ## set proper shape.
        A = reshape (A, sz);

      ## we fill in nonzero fill value.
      if (fillval != 0)
        mask = true (size (A));
        mask(subs) = false;
        A(mask) = fillval;
    elseif (func == @max)
      ## Fast maximization.

      if (isinteger (vals))
        zero = intmin (class (vals));
      elseif (islogical (vals))
        zero = false;
      elseif (fillval == 0 && all (vals(:) >= 0))
        ## This is a common case - fillval is zero, all numbers nonegative.
        zero = 0;
        zero = NaN; # Neutral value.

      if (isempty (sz))
        A = __accumarray_max__ (subs, vals, zero);
        A = __accumarray_max__ (subs, vals, zero, prod (sz));
        A = reshape (A, sz);

      if (fillval != zero && ! (isnan (fillval) || isnan (zero)))
        mask = true (size (A));
        mask(subs) = false;
        A(mask) = fillval;
    elseif (func == @min)
      ## Fast minimization.

      if (isinteger (vals))
        zero = intmax (class (vals));
      elseif (islogical (vals))
        zero = true;
        zero = NaN; # Neutral value.

      if (isempty (sz))
        A = __accumarray_min__ (subs, vals, zero);
        A = __accumarray_min__ (subs, vals, zero, prod (sz));
        A = reshape (A, sz);

      if (fillval != zero && ! (isnan (fillval) || isnan (zero)))
        mask = true (size (A));
        mask(subs) = false;
        A(mask) = fillval;

      ## The general case. Reduce values.
      n = rows (subs);
      if (numel (vals) == 1)
        vals = vals(ones (1, n), 1);
        vals = vals(:);

      ## Sort indices.
      [subs, idx] = sort (subs);
      ## Identify runs.
      jdx = find (subs(1:n-1) != subs(2:n));
      jdx = [jdx; n];
      vals = mat2cell (vals(idx), diff ([0; jdx]));
      ## Optimize the case when function is @(x) {x}, i.e. we just want to
      ## collect the values to cells.
      persistent simple_cell_str = func2str (@(x) {x});
      if (! strcmp (func2str (func), simple_cell_str))
        vals = cellfun (func, vals);
      subs = subs(jdx);

      ## Construct matrix of fillvals.
      if (iscell (vals))
        A = cell (sz);
      elseif (fillval == 0)
        A = zeros (sz, class (vals));
        A = repmat (fillval, sz);

      ## Set the reduced values.
      A(subs) = vals;

%!error (accumarray (1:5))
%!error (accumarray ([1,2,3],1:2))
%!assert (accumarray ([1;2;4;2;4],101:105), [101;206;0;208])
%!assert (accumarray ([1,1,1;2,1,2;2,3,2;2,1,2;2,3,2],101:105),cat(3, [101,0,0;0,0,0],[0,0,0;206,0,208]))
%!assert (accumarray ([1,1,1;2,1,2;2,3,2;2,1,2;2,3,2],101:105,[],@(x)sin(sum(x))),sin(cat(3, [101,0,0;0,0,0],[0,0,0;206,0,208])))
%!assert (accumarray ({[1 3 3 2 3 1 2 2 3 3 1 2],[3 4 2 1 4 3 4 2 2 4 3 4],[1 1 2 2 1 1 2 1 1 1 2 2]},101:112),cat(3,[0,0,207,0;0,108,0,0;0,109,0,317],[0,0,111,0;104,0,0,219;0,103,0,0]))
%!assert (accumarray ([1,1;2,1;2,3;2,1;2,3],101:105,[2,4],@max,NaN),[101,NaN,NaN,NaN;104,NaN,105,NaN])
%!assert (accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],101:105,[2 4],@prod,0,true),sparse([1,2,2],[1,1,3],[101,10608,10815],2,4))
%!assert (accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],1,[2,4]), [1,0,0,0;2,0,2,0])
%!assert (accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],101:105,[2,4],@(x)length(x)>1),[false,false,false,false;true,false,true,false])
%! A = accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],101:105,[2,4],@(x){x});
%! assert (A{2},[102;104])
%! subs = ceil (rand (2000, 3)*10);
%! vals = rand (2000, 1);
%! assert (accumarray (subs, vals, [], @max), accumarray (subs, vals, [], @(x) max (x)));
%! subs = ceil (rand (2000, 1)*100);
%! vals = rand (2000, 1);
%! assert (accumarray (subs, vals, [100, 1], @min, NaN), accumarray (subs, vals, [100, 1], @(x) min (x), NaN));
%! subs = ceil (rand (2000, 2)*30);
%! subsc = num2cell (subs, 1);
%! vals = rand (2000, 1);
%! assert (accumarray (subsc, vals, [], [], 0, true), accumarray (subs, vals, [], [], 0, true));
%! subs = ceil (rand (2000, 3)*10);
%! subsc = num2cell (subs, 1);
%! vals = rand (2000, 1);
%! assert (accumarray (subsc, vals, [], @max), accumarray (subs, vals, [], @max));