Mercurial > octave-dspies

/*

Copyright (C) 2014 David Spies

This file is part of Octave.

Octave is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

Octave is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with Octave; see the file COPYING.  If not, see
<http://www.gnu.org/licenses/>.

*/

// A template for functions which accumulate all the values along
// the rows or columns of a matrix into a single row or column vector
// (more generally removing one dimension of a many-dimensional matrix)
// Functions which should call this one:
// min, max, sum, product, any, all
//
// TODO: Allow possible second return value for min and max
#include "dim-vector.h"
#include "Array.h"

// TODO Support any/all with short-circuiting
enum fun_type
{
  min, max, sum, prod
};

const bool MIN = false;
const bool MAX = true;

const int SUM = 0;
const int PROD = 1;

const int COL_ACCUM = 0;
const int ROW_ACCUM = 1;

// Pairs a value together with its index along the
// accumulated dimension
template<typename T>
struct Elem
{
  const octave_idx_type accum_index;
  const T value;
  Elem (octave_idx_type accum_index, T value) :
      accum_index (accum_index), value (value) { }
};

//Chooses < or > based on template argument
//(but only uses < operation)
template<typename E, bool MinOrMax>
bool
exceeds (E v1, E v2) { return (MinOrMax == MIN) ? v1 < v2 : v2 < v1; }

template<typename E, bool MinOrMax>
struct FullMMAccum
{
public:

  Array<Elem<E> > res;
  Array<double> ind;

  FullMMAccum (const dim_vector& dims) : res (dims), ind (dims, 0) { }

  void
  add (octave_idx_type to_index, Elem<E> rhs)
  {
    if (ind (to_index) == 0 || exceeds<E, MinOrMax> (rhs.value, res (to_index)))
      {
        ind (to_index) = rhs.index + 1;
        res (to_index) = rhs.value;
      }
  }

  octave_value_list
  post_process (void)
  {
    return octave_value_list (res, ind);
  }
};

// TODO Separate Sum and Prod
// for short-circuiting Prod
template<typename E, int SorP>
struct FullSPAccum
{

  Array<E> res;

  FullSPAccum (const dim_vector& dims) : res (dims, SorP) { }

  void
  add (octave_idx_type to_index, Elem<E> rhs)
  {
    if (SorP == PROD)
      res (to_index) *= rhs.value;
    else
      res (to_index) += rhs.value;
  }

  octave_value_list
  post_process ()
  {
    return octave_value_list (res);
  }
};

template<typename E, bool MinOrMax>
struct SparseMMAccum
{
private:
  FullMMAccum<E, MinOrMax> my_accum;
public:
  SparseMMAccum (dim_vector dims) : my_accum (dims) { }

  void
  add (octave_idx_type ind, Elem<E> value)
  {
    my_accum.add (ind, value);
  }

  octave_value_list
  post_process (void)
  {
    return octave_value_list (Sparse<E> (my_accum.res), my_accum.ind);
  }
};

// TODO Separate Sum and Prod
// for short-circuiting Prod
template<typename E, int SorP>
struct SparseSPAccum
{
private:
  FullSPAccum<E, SorP> my_accum;
public:
  SparseSPAccum (dim_vector dims) : my_accum (dims) { }

  void
  add (octave_idx_type ind, Elem<E> value)
  {
    my_accum.add (ind, value);
  }

  octave_value_list
  post_process (void)
  {
    return octave_value_list (Sparse<E> (my_accum.res));
  }
};

template<typename E>
octave_value_list
sparseColumn (octave_idx_type height,
              std::vector<std::pair<octave_idx_type, E> > vec)
{
  Sparse<E> res = Sparse<E> (height, 1, vec.size ());
  res.xcidx (0) = 0;
  res.xcidx (1) = vec.size ();
  for (size_t i = 0; i < vec.size; ++i)
    {
      res.xridx (i) = vec[i].first;
      res.xdata (i) = vec[i].second;
    }
  return octave_value_list (res);
}

template<typename E, bool MinOrMax>
struct TallMMAccum
{
private:
  const octave_idx_type height;
  std::vector<std::pair<octave_idx_type, E> > res_vec;

public:
  TallMMAccum (octave_idx_type height) : height (height) { }

  void
  add_term (octave_idx_type row, Elem<E> value)
  {
    res_vec.push_back (std::pair (row, value.value));
  }

  void
  join_term (Elem<E> value)
  {
    if (exceeds<E, MinOrMax> (value.value, res_vec.back ().second))
      {
        res_vec.back ().second = value.value;
      }
  }

  octave_value_list
  post_process (void)
  {
    return sparseColumn (height, res_vec);
  }
};

template<typename E, int SorP>
struct TallSPAccum
{
private:
  const octave_idx_type height;
  std::vector<std::pair<octave_idx_type, E> > res_vec;

public:
  TallSPAccum (octave_idx_type height) : height (height) { }

  void
  add_term (octave_idx_type row, Elem<E> value)
  {
    res_vec.push_back (std::pair (row, value.value));
  }

  void
  join_term (Elem<E> value)
  {
    res_vec.back ().second += value.value;
  }

  octave_value_list
  post_process ()
  {
    return sparseColumn (height, res_vec);
  }
};

// M is the type of matrix. Right now I'm only handling
// Array or Sparse.  TODO What other types must I handle?
template<typename M>
octave_value_list
accumulate (const M& matrix, int dim, int nargout, fun_type t)
{
  if (dim < 0)
    {
      // TODO Raise exception: how?
      assert(false);
    }
  else if (dim >= matrix.ndims ())
    return octave_value_list (matrix);
  // TODO Add is_sparse to SparseMatrix
  // Is there another way to check?
  else if (M::is_sparse)
    return select_sparse_accumulate<M::element_type> (matrix, dim, t, nargout);
  else
    return select_full_accumulate<M::element_type> (matrix, dim, t);
}

template<typename E>
octave_value_list
select_full_accumulate (const Array<E>& matrix, int dim, fun_type t)
{
  // TODO Deal with h = 0 and w = 0
  switch (t)
    {
    case min:
      return full_accumulate<E, FullMMAccum<E, MIN> > (matrix, dim);
    case max:
      return full_accumulate<E, FullMMAccum<E, MAX> > (matrix, dim);
    case sum:
      return full_accumulate<E, FullSPAccum<E, SUM> > (matrix, dim);
    case prod:
      return full_accumulate<E, FullSPAccum<E, PROD> > (matrix, dim);
    default:
      assert(false);
      // TODO Raise exception instead?
    }
}

// Change dim to a template parameter
template<typename E>
octave_value_list
select_sparse_accumulate (const Sparse<E>& matrix, int dim, fun_type t,
                          int nargout)
{
  switch (dim)
    {
    case ROW_ACCUM:
      return select_sparse_accumulate<E, ROW_ACCUM> (matrix, t, nargout);
    case COL_ACCUM:
      return select_sparse_accumulate<E, COL_ACCUM> (matrix, t, nargout);
    default:
      // This was already checked by accumulate
      assert(false);
    }
}

// Accumulating over the columns of a sparse matrix is simple.
// The rows are a little more complicated
// There are two possible algorithms for accumulating over
// the rows of a sparse matrix
// the simple one runs in O(h+nnz) and just
// constructs a full result vector (which can
// be converted to a sparse vector).
// The tricky one uses a heap to construct a sparse
// result vector directly and runs in O(w+nnz*log(w)).
// We need to look at matrix's dimensions to see
// which one will work faster.  If the matrix
// is tall and sparse enough, we use the second one.

template<typename E, int dim>
octave_value_list
select_sparse_accumulate (const Sparse<E>& matrix, fun_type t, int nargout)
{
  octave_idx_type w = matrix.cols ();
  octave_idx_type h = matrix.rows ();
  octave_idx_type nnz = matrix.nnz ();
  //TODO Deal with h = 0 and w = 0
  // For perfect optimality there should probably be some constants
  // before each of these terms (determined by extensive profiling).
  // But for big-O optimality, just making this comparison is good
  // enough.
  // Using float log because being precise isn't important.
  if (nargout == 1 && dim == ROW_ACCUM && w + nnz * log2f ((float) w) < h + nnz)
    {
      switch (t)
        {
        case min:
          return sparse_tall_row_accum<E, TallMMAccum<E, MIN> > (matrix);
        case max:
          return sparse_tall_row_accum<E, TallMMAccum<E, MAX> > (matrix);
        case sum:
          return sparse_tall_row_accum<E, TallSPAccum<E, SUM> > (matrix);
        case prod:
          return sparse_tall_row_accum<E, TallSPAccum<E, PROD> > (matrix);
        }
    }
  else
    {
      switch (t)
        {
        case min:
          return sparse_normal_accum<E, SparseMMAccum<E, MIN>, dim> (matrix);
        case max:
          return sparse_normal_accum<E, SparseMMAccum<E, MAX>, dim> (matrix);
        case sum:
          return sparse_normal_accum<E, SparseSPAccum<E, SUM>, dim> (matrix);
        case prod:
          return sparse_normal_accum<E, SparseSPAccum<E, PROD>, dim> (matrix);
        }
    }
}

// excluded_indexer takes a dim_vector v and an "excluded dimension" d
// and iterates over the indices 0 to n (where n = v.numel()) while
// simultaneously maintaining an iterator over (v excluding d) meaning
// the index resulting from removing the dth element from v.
// ex. let v = [3,4,5] and d = 1
// then we can use an excluded_indexer to iterate over the pairs
// from_idx to_idx
//
// 0 0 (= [0,0,0], [0,0])
// 1 1 (= [1,0,0], [1,0])
// 2 2 (= [2,0,0], [2,0])
// 3 0 (= [0,1,0], [0,0])
// 4 1 (= [1,1,0], [1,0])
// 5 2 (= [2,1,0], [2,0])
// 6 0 (= [0,2,0], [0,0])
// 7 1 (= [1,2,0], [1,0])
// 8 2 (= [2,2,0], [2,0])
// 9 0 (= [0,3,0], [0,0])
// 10 1 (= [1,3,0], [1,0])
// 11 2 (= [2,3,0], [2,0])
// 12 3 (= [0,0,1], [0,1])
// 13 4 (= [1,0,1], [1,1])
// 14 5 (= [2,0,1], [2,1])
// 15 3 (= [0,1,1], [0,1])
// ...etc...
// 58 13 (= [1,3,4], [1,3])
// 59 14 (= [2,3,4], [2,4])
//
// This is done extremely efficiently using only incrementing and subtraction
// (no multiplication,division modulo etc.) and is the most spatial-locality
// friendly way to iterate over the two matrices.

struct excluded_indexer
{
private:
  const octave_idx_type before_size;
  const octave_idx_type between_size;

  octave_idx_type from_idx;
  octave_idx_type to_idx;

  octave_idx_type before_ctr;
  octave_idx_type between_ctr;

public:

  excluded_indexer (const dim_vector& sizes, int dim) :
      before_size (product_prefix (sizes, dim)), between_size (sizes (dim)),
      from_idx (0), to_idx (0), before_ctr (0), between_ctr (0) { }

  // Returns the index for the input (larger) matrix
  octave_idx_type
  get_from_idx (void) const
  {
    return from_idx;
  }

  // Returns the index for the output (smaller) matrix
  octave_idx_type
  get_to_idx (void) const
  {
    return to_idx;
  }

  octave_idx_type
  get_accum_idx (void) const
  {
    return between_ctr;
  }

  // Increments both indices and then subtracts from
  // the output index if necessary
  void
  operator++ (void)
  {
    ++from_idx;
    ++to_idx;
    ++before_ctr;
    if (before_ctr == before_size)
      {
        before_ctr = 0;
        ++between_ctr;
        if (between_ctr == between_size)
          between_ctr = 0;
        else
          to_idx -= before_size;
      }
  }

  // Compares the input index with from_size
  bool
  operator< (octave_idx_type from_size) const
  {
    return from_idx < from_size;
  }
};

// Accumulate method for full matrices
// Accumulator takes a dimension in its constructor.  It supports methods
// void add(octave_idx_type, Elem<E>) and octave_value_list post_process()
template<typename E, typename Accumulator>
octave_value_list
full_accumulate (const Array<E>& matrix, int dim)
{
  const dim_vector sizes = matrix.get_dims ();
  dim_vector res_dims = sizes;
  res_dims (dim) = 1;
  // TODO Can I do this? Is it safe to mutate a dim_vector
  // Does (res_dims = sizes) make a copy or copy the pointer?
  Accumulator res (res_dims);
  octave_idx_type from_numel = sizes.numel ();
  octave_idx_type to_numel = res_dims.numel ();
  for (excluded_indexer i (sizes, dim); i < from_numel; ++i)
    {
      res.add (i.get_to_idx (),
               Elem<E> (i.get_accum_idx (), matrix.xelem (i.get_from_idx ())));
    }
  return res.post_process ();
}

// Takes the product of the first dim dimensions of sizes
octave_idx_type
product_prefix (const dim_vector& sizes, int dim)
{
  assert(dim <= sizes.ndims ());
  octave_idx_type res = 1;
  for (int i = 0; i < dim; ++i)
    {
      res *= sizes (i);
    }
  return res;
}

template<typename E, typename Accumulator, int accum_dir>
octave_value_list
sparse_normal_accum (const Sparse<E>& matrix)
{
  assert(accum_dir == 1 || accum_dir == 0);
  dim_vector accum_dims = matrix.dims ();
  octave_idx_type idx_size = accum_dims (1 - accum_dir);
  octave_idx_type accum_size = accum_dims (accum_dir);
  accum_dims (accum_dir) = 1;
  Accumulator res (accum_dims);
  std::vector<octave_idx_type> known_mex (idx_size, 0);
  for (octave_idx_type col = 0; col < matrix.cols (); ++col)
    {
      for (octave_idx_type i = matrix.cidx (col); i < matrix.cidx (col + 1);
          ++i)
        {
          octave_idx_type res_idx;
          octave_idx_type accum_idx;
          if (accum_dir == COL_ACCUM)
            {
              res_idx = col;
              accum_idx = matrix.ridx (i);
            }
          else if (accum_dir == ROW_ACCUM)
            {
              res_idx = matrix.ridx (i);
              accum_idx = col;
            }
          else
            {
              assert(false);
            }
          if (known_mex[res_idx] != (octave_idx_type) (-1))
            {
              if (accum_idx == known_mex[res_idx])
                ++known_mex[res_idx];
              else
                {
                  assert(accum_idx > known_mex[res_idx]);
                  res.add (res_idx, Elem<E> (known_mex[res_idx], 0));
                  known_mex[res_idx] = (octave_idx_type) (-1);
                }
            }
          res.add (res_idx, Elem<E> (accum_idx, matrix.data[i]));
        }
    }
  for (octave_idx_type i = 0; i < idx_size; ++i)
    {
      if (known_mex[i] != (octave_idx_type) (-1) && known_mex[i] < accum_size)
        {
          res.add (i, Elem<E> (known_mex[i], 0));
        }
    }
  return res.post_process ();
}

// Knows everything about an index-value pair
// in a sparse matrix (row, col, nnz_idx, value)
template<typename E>
struct SparseElem
{
  const octave_idx_type row;
  const octave_idx_type col;
  const E value;
  const octave_idx_type idx;

  SparseElem (octave_idx_type row, octave_idx_type col, E value,
              octave_idx_type idx) :
      row (row), col (col), value (value), idx (idx) { }

  bool
  operator< (const SparseElem<E> rhs) const
  {
    return row < rhs.row || (row == rhs.row && col < rhs.col);
  }
};

template<typename E, typename TallAccumulator>
octave_value_list
tall_sparse_row_accum (const Sparse<E>& matrix)
{
  TallAccumulator res;

  std::priority_queue<SparseElem<E> > nextItem;
  for (octave_idx_type col = 0; col < matrix.cols (); ++col)
    {
      octave_idx_type idx = matrix.cidx[col];
      nextItem.push (
          SparseElem<E> (matrix.ridx[idx], col, matrix.data[idx], idx));
    }

  octave_idx_type current_row = -1;

  // mex is short for minimum excluded value.
  // (meaning the smallest nonnegative integer which is not
  // in a set. accum_mex is the mex of the columns containing
  // nonzero elements in the current row.
  // It's uncommon to find a use for the function
  // outside of combinatoric game theory. In this case,
  // a row contains a zero if accum_mex < width.
  octave_idx_type accum_mex;

  // It's either macro or copy-paste.  I chose the lesser
  // of two evils.  If you don't like it, learn Lisp.
  // ***
#define finish_row \
      if (current_row >= 0 \
       && accum_mex >= 0 \
       && accum_mex  < matrix.cols ()) \
        res.join_term (Elem<E> (accum_mex, 0)); \

  while (!nextItem.empty ())
    {
      SparseElem<E> item = nextItem.front ();
      nextItem.pop ();
      if (item.row != current_row)
        {
          // macro; see *** above
          finish_row;

          current_row = item.row;
          if (item.col > 0)
            {
              res.add_term (item.row, Elem<E> (0, 0));
              accum_mex = -1;
            }
          else
            accum_mex = 1;
          res.add_term (item.row, Elem<E> (item.col, item.value));
        }
      else
        {
          if (accum_mex != (octave_idx_type) (-1))
            {
              if (item.col == accum_mex)
                ++accum_mex;
              else
                {
                  res.join_term (Elem<E> (accum_mex, 0));
                  accum_mex = -1;
                }
            }
          res.join_term (Elem<E> (item.col, item.value));
        }

      octave_idx_type idx = item.idx + 1;
      octave_idx_type col = item.col;
      if (idx < matrix.cidx[col + 1])
        {
          nextItem.push (
              SparseElem<E> (matrix.ridx[idx], col, matrix.data[idx], idx));
        }
    }

  // macro; see *** above
  finish_row;

#undef finish_row

  return res.post_process ();
}
author	David Spies <dnspies@gmail.com>
date	Wed, 11 Jun 2014 16:59:46 -0600
parents
children