Mercurial > octave-dspies

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/liboctave/util/accumulate.cc	Wed Jun 11 16:59:46 2014 -0600
@@ -0,0 +1,648 @@
+/*
+
+Copyright (C) 2014 David Spies
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+Octave is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING.  If not, see
+<http://www.gnu.org/licenses/>.
+
+*/
+
+// A template for functions which accumulate all the values along
+// the rows or columns of a matrix into a single row or column vector
+// (more generally removing one dimension of a many-dimensional matrix)
+// Functions which should call this one:
+// min, max, sum, product, any, all
+//
+// TODO: Allow possible second return value for min and max
+#include "dim-vector.h"
+#include "Array.h"
+
+// TODO Support any/all with short-circuiting
+enum fun_type
+{
+  min, max, sum, prod
+};
+
+const bool MIN = false;
+const bool MAX = true;
+
+const int SUM = 0;
+const int PROD = 1;
+
+const int COL_ACCUM = 0;
+const int ROW_ACCUM = 1;
+
+// Pairs a value together with its index along the
+// accumulated dimension
+template<typename T>
+struct Elem
+{
+  const octave_idx_type accum_index;
+  const T value;
+  Elem (octave_idx_type accum_index, T value) :
+      accum_index (accum_index), value (value) { }
+};
+
+//Chooses < or > based on template argument
+//(but only uses < operation)
+template<typename E, bool MinOrMax>
+bool
+exceeds (E v1, E v2) { return (MinOrMax == MIN) ? v1 < v2 : v2 < v1; }
+
+template<typename E, bool MinOrMax>
+struct FullMMAccum
+{
+public:
+
+  Array<Elem<E> > res;
+  Array<double> ind;
+
+  FullMMAccum (const dim_vector& dims) : res (dims), ind (dims, 0) { }
+
+  void
+  add (octave_idx_type to_index, Elem<E> rhs)
+  {
+    if (ind (to_index) == 0 || exceeds<E, MinOrMax> (rhs.value, res (to_index)))
+      {
+        ind (to_index) = rhs.index + 1;
+        res (to_index) = rhs.value;
+      }
+  }
+
+  octave_value_list
+  post_process (void)
+  {
+    return octave_value_list (res, ind);
+  }
+};
+
+// TODO Separate Sum and Prod
+// for short-circuiting Prod
+template<typename E, int SorP>
+struct FullSPAccum
+{
+
+  Array<E> res;
+
+  FullSPAccum (const dim_vector& dims) : res (dims, SorP) { }
+
+  void
+  add (octave_idx_type to_index, Elem<E> rhs)
+  {
+    if (SorP == PROD)
+      res (to_index) *= rhs.value;
+    else
+      res (to_index) += rhs.value;
+  }
+
+  octave_value_list
+  post_process ()
+  {
+    return octave_value_list (res);
+  }
+};
+
+template<typename E, bool MinOrMax>
+struct SparseMMAccum
+{
+private:
+  FullMMAccum<E, MinOrMax> my_accum;
+public:
+  SparseMMAccum (dim_vector dims) : my_accum (dims) { }
+
+  void
+  add (octave_idx_type ind, Elem<E> value)
+  {
+    my_accum.add (ind, value);
+  }
+
+  octave_value_list
+  post_process (void)
+  {
+    return octave_value_list (Sparse<E> (my_accum.res), my_accum.ind);
+  }
+};
+
+// TODO Separate Sum and Prod
+// for short-circuiting Prod
+template<typename E, int SorP>
+struct SparseSPAccum
+{
+private:
+  FullSPAccum<E, SorP> my_accum;
+public:
+  SparseSPAccum (dim_vector dims) : my_accum (dims) { }
+
+  void
+  add (octave_idx_type ind, Elem<E> value)
+  {
+    my_accum.add (ind, value);
+  }
+
+  octave_value_list
+  post_process (void)
+  {
+    return octave_value_list (Sparse<E> (my_accum.res));
+  }
+};
+
+template<typename E>
+octave_value_list
+sparseColumn (octave_idx_type height,
+              std::vector<std::pair<octave_idx_type, E> > vec)
+{
+  Sparse<E> res = Sparse<E> (height, 1, vec.size ());
+  res.xcidx (0) = 0;
+  res.xcidx (1) = vec.size ();
+  for (size_t i = 0; i < vec.size; ++i)
+    {
+      res.xridx (i) = vec[i].first;
+      res.xdata (i) = vec[i].second;
+    }
+  return octave_value_list (res);
+}
+
+template<typename E, bool MinOrMax>
+struct TallMMAccum
+{
+private:
+  const octave_idx_type height;
+  std::vector<std::pair<octave_idx_type, E> > res_vec;
+
+public:
+  TallMMAccum (octave_idx_type height) : height (height) { }
+
+  void
+  add_term (octave_idx_type row, Elem<E> value)
+  {
+    res_vec.push_back (std::pair (row, value.value));
+  }
+
+  void
+  join_term (Elem<E> value)
+  {
+    if (exceeds<E, MinOrMax> (value.value, res_vec.back ().second))
+      {
+        res_vec.back ().second = value.value;
+      }
+  }
+
+  octave_value_list
+  post_process (void)
+  {
+    return sparseColumn (height, res_vec);
+  }
+};
+
+template<typename E, int SorP>
+struct TallSPAccum
+{
+private:
+  const octave_idx_type height;
+  std::vector<std::pair<octave_idx_type, E> > res_vec;
+
+public:
+  TallSPAccum (octave_idx_type height) : height (height) { }
+
+  void
+  add_term (octave_idx_type row, Elem<E> value)
+  {
+    res_vec.push_back (std::pair (row, value.value));
+  }
+
+  void
+  join_term (Elem<E> value)
+  {
+    res_vec.back ().second += value.value;
+  }
+
+  octave_value_list
+  post_process ()
+  {
+    return sparseColumn (height, res_vec);
+  }
+};
+
+// M is the type of matrix. Right now I'm only handling
+// Array or Sparse.  TODO What other types must I handle?
+template<typename M>
+octave_value_list
+accumulate (const M& matrix, int dim, int nargout, fun_type t)
+{
+  if (dim < 0)
+    {
+      // TODO Raise exception: how?
+      assert(false);
+    }
+  else if (dim >= matrix.ndims ())
+    return octave_value_list (matrix);
+  // TODO Add is_sparse to SparseMatrix
+  // Is there another way to check?
+  else if (M::is_sparse)
+    return select_sparse_accumulate<M::element_type> (matrix, dim, t, nargout);
+  else
+    return select_full_accumulate<M::element_type> (matrix, dim, t);
+}
+
+template<typename E>
+octave_value_list
+select_full_accumulate (const Array<E>& matrix, int dim, fun_type t)
+{
+  // TODO Deal with h = 0 and w = 0
+  switch (t)
+    {
+    case min:
+      return full_accumulate<E, FullMMAccum<E, MIN> > (matrix, dim);
+    case max:
+      return full_accumulate<E, FullMMAccum<E, MAX> > (matrix, dim);
+    case sum:
+      return full_accumulate<E, FullSPAccum<E, SUM> > (matrix, dim);
+    case prod:
+      return full_accumulate<E, FullSPAccum<E, PROD> > (matrix, dim);
+    default:
+      assert(false);
+      // TODO Raise exception instead?
+    }
+}
+
+// Change dim to a template parameter
+template<typename E>
+octave_value_list
+select_sparse_accumulate (const Sparse<E>& matrix, int dim, fun_type t,
+                          int nargout)
+{
+  switch (dim)
+    {
+    case ROW_ACCUM:
+      return select_sparse_accumulate<E, ROW_ACCUM> (matrix, t, nargout);
+    case COL_ACCUM:
+      return select_sparse_accumulate<E, COL_ACCUM> (matrix, t, nargout);
+    default:
+      // This was already checked by accumulate
+      assert(false);
+    }
+}
+
+// Accumulating over the columns of a sparse matrix is simple.
+// The rows are a little more complicated
+// There are two possible algorithms for accumulating over
+// the rows of a sparse matrix
+// the simple one runs in O(h+nnz) and just
+// constructs a full result vector (which can
+// be converted to a sparse vector).
+// The tricky one uses a heap to construct a sparse
+// result vector directly and runs in O(w+nnz*log(w)).
+// We need to look at matrix's dimensions to see
+// which one will work faster.  If the matrix
+// is tall and sparse enough, we use the second one.
+
+template<typename E, int dim>
+octave_value_list
+select_sparse_accumulate (const Sparse<E>& matrix, fun_type t, int nargout)
+{
+  octave_idx_type w = matrix.cols ();
+  octave_idx_type h = matrix.rows ();
+  octave_idx_type nnz = matrix.nnz ();
+  //TODO Deal with h = 0 and w = 0
+  // For perfect optimality there should probably be some constants
+  // before each of these terms (determined by extensive profiling).
+  // But for big-O optimality, just making this comparison is good
+  // enough.
+  // Using float log because being precise isn't important.
+  if (nargout == 1 && dim == ROW_ACCUM && w + nnz * log2f ((float) w) < h + nnz)
+    {
+      switch (t)
+        {
+        case min:
+          return sparse_tall_row_accum<E, TallMMAccum<E, MIN> > (matrix);
+        case max:
+          return sparse_tall_row_accum<E, TallMMAccum<E, MAX> > (matrix);
+        case sum:
+          return sparse_tall_row_accum<E, TallSPAccum<E, SUM> > (matrix);
+        case prod:
+          return sparse_tall_row_accum<E, TallSPAccum<E, PROD> > (matrix);
+        }
+    }
+  else
+    {
+      switch (t)
+        {
+        case min:
+          return sparse_normal_accum<E, SparseMMAccum<E, MIN>, dim> (matrix);
+        case max:
+          return sparse_normal_accum<E, SparseMMAccum<E, MAX>, dim> (matrix);
+        case sum:
+          return sparse_normal_accum<E, SparseSPAccum<E, SUM>, dim> (matrix);
+        case prod:
+          return sparse_normal_accum<E, SparseSPAccum<E, PROD>, dim> (matrix);
+        }
+    }
+}
+
+// excluded_indexer takes a dim_vector v and an "excluded dimension" d
+// and iterates over the indices 0 to n (where n = v.numel()) while
+// simultaneously maintaining an iterator over (v excluding d) meaning
+// the index resulting from removing the dth element from v.
+// ex. let v = [3,4,5] and d = 1
+// then we can use an excluded_indexer to iterate over the pairs
+// from_idx to_idx
+//
+// 0 0 (= [0,0,0], [0,0])
+// 1 1 (= [1,0,0], [1,0])
+// 2 2 (= [2,0,0], [2,0])
+// 3 0 (= [0,1,0], [0,0])
+// 4 1 (= [1,1,0], [1,0])
+// 5 2 (= [2,1,0], [2,0])
+// 6 0 (= [0,2,0], [0,0])
+// 7 1 (= [1,2,0], [1,0])
+// 8 2 (= [2,2,0], [2,0])
+// 9 0 (= [0,3,0], [0,0])
+// 10 1 (= [1,3,0], [1,0])
+// 11 2 (= [2,3,0], [2,0])
+// 12 3 (= [0,0,1], [0,1])
+// 13 4 (= [1,0,1], [1,1])
+// 14 5 (= [2,0,1], [2,1])
+// 15 3 (= [0,1,1], [0,1])
+// ...etc...
+// 58 13 (= [1,3,4], [1,3])
+// 59 14 (= [2,3,4], [2,4])
+//
+// This is done extremely efficiently using only incrementing and subtraction
+// (no multiplication,division modulo etc.) and is the most spatial-locality
+// friendly way to iterate over the two matrices.
+
+struct excluded_indexer
+{
+private:
+  const octave_idx_type before_size;
+  const octave_idx_type between_size;
+
+  octave_idx_type from_idx;
+  octave_idx_type to_idx;
+
+  octave_idx_type before_ctr;
+  octave_idx_type between_ctr;
+
+public:
+
+  excluded_indexer (const dim_vector& sizes, int dim) :
+      before_size (product_prefix (sizes, dim)), between_size (sizes (dim)),
+      from_idx (0), to_idx (0), before_ctr (0), between_ctr (0) { }
+
+  // Returns the index for the input (larger) matrix
+  octave_idx_type
+  get_from_idx (void) const
+  {
+    return from_idx;
+  }
+
+  // Returns the index for the output (smaller) matrix
+  octave_idx_type
+  get_to_idx (void) const
+  {
+    return to_idx;
+  }
+
+  octave_idx_type
+  get_accum_idx (void) const
+  {
+    return between_ctr;
+  }
+
+  // Increments both indices and then subtracts from
+  // the output index if necessary
+  void
+  operator++ (void)
+  {
+    ++from_idx;
+    ++to_idx;
+    ++before_ctr;
+    if (before_ctr == before_size)
+      {
+        before_ctr = 0;
+        ++between_ctr;
+        if (between_ctr == between_size)
+          between_ctr = 0;
+        else
+          to_idx -= before_size;
+      }
+  }
+
+  // Compares the input index with from_size
+  bool
+  operator< (octave_idx_type from_size) const
+  {
+    return from_idx < from_size;
+  }
+};
+
+// Accumulate method for full matrices
+// Accumulator takes a dimension in its constructor.  It supports methods
+// void add(octave_idx_type, Elem<E>) and octave_value_list post_process()
+template<typename E, typename Accumulator>
+octave_value_list
+full_accumulate (const Array<E>& matrix, int dim)
+{
+  const dim_vector sizes = matrix.get_dims ();
+  dim_vector res_dims = sizes;
+  res_dims (dim) = 1;
+  // TODO Can I do this? Is it safe to mutate a dim_vector
+  // Does (res_dims = sizes) make a copy or copy the pointer?
+  Accumulator res (res_dims);
+  octave_idx_type from_numel = sizes.numel ();
+  octave_idx_type to_numel = res_dims.numel ();
+  for (excluded_indexer i (sizes, dim); i < from_numel; ++i)
+    {
+      res.add (i.get_to_idx (),
+               Elem<E> (i.get_accum_idx (), matrix.xelem (i.get_from_idx ())));
+    }
+  return res.post_process ();
+}
+
+// Takes the product of the first dim dimensions of sizes
+octave_idx_type
+product_prefix (const dim_vector& sizes, int dim)
+{
+  assert(dim <= sizes.ndims ());
+  octave_idx_type res = 1;
+  for (int i = 0; i < dim; ++i)
+    {
+      res *= sizes (i);
+    }
+  return res;
+}
+
+template<typename E, typename Accumulator, int accum_dir>
+octave_value_list
+sparse_normal_accum (const Sparse<E>& matrix)
+{
+  assert(accum_dir == 1 || accum_dir == 0);
+  dim_vector accum_dims = matrix.dims ();
+  octave_idx_type idx_size = accum_dims (1 - accum_dir);
+  octave_idx_type accum_size = accum_dims (accum_dir);
+  accum_dims (accum_dir) = 1;
+  Accumulator res (accum_dims);
+  std::vector<octave_idx_type> known_mex (idx_size, 0);
+  for (octave_idx_type col = 0; col < matrix.cols (); ++col)
+    {
+      for (octave_idx_type i = matrix.cidx (col); i < matrix.cidx (col + 1);
+          ++i)
+        {
+          octave_idx_type res_idx;
+          octave_idx_type accum_idx;
+          if (accum_dir == COL_ACCUM)
+            {
+              res_idx = col;
+              accum_idx = matrix.ridx (i);
+            }
+          else if (accum_dir == ROW_ACCUM)
+            {
+              res_idx = matrix.ridx (i);
+              accum_idx = col;
+            }
+          else
+            {
+              assert(false);
+            }
+          if (known_mex[res_idx] != (octave_idx_type) (-1))
+            {
+              if (accum_idx == known_mex[res_idx])
+                ++known_mex[res_idx];
+              else
+                {
+                  assert(accum_idx > known_mex[res_idx]);
+                  res.add (res_idx, Elem<E> (known_mex[res_idx], 0));
+                  known_mex[res_idx] = (octave_idx_type) (-1);
+                }
+            }
+          res.add (res_idx, Elem<E> (accum_idx, matrix.data[i]));
+        }
+    }
+  for (octave_idx_type i = 0; i < idx_size; ++i)
+    {
+      if (known_mex[i] != (octave_idx_type) (-1) && known_mex[i] < accum_size)
+        {
+          res.add (i, Elem<E> (known_mex[i], 0));
+        }
+    }
+  return res.post_process ();
+}
+
+// Knows everything about an index-value pair
+// in a sparse matrix (row, col, nnz_idx, value)
+template<typename E>
+struct SparseElem
+{
+  const octave_idx_type row;
+  const octave_idx_type col;
+  const E value;
+  const octave_idx_type idx;
+
+  SparseElem (octave_idx_type row, octave_idx_type col, E value,
+              octave_idx_type idx) :
+      row (row), col (col), value (value), idx (idx) { }
+
+  bool
+  operator< (const SparseElem<E> rhs) const
+  {
+    return row < rhs.row || (row == rhs.row && col < rhs.col);
+  }
+};
+
+template<typename E, typename TallAccumulator>
+octave_value_list
+tall_sparse_row_accum (const Sparse<E>& matrix)
+{
+  TallAccumulator res;
+
+  std::priority_queue<SparseElem<E> > nextItem;
+  for (octave_idx_type col = 0; col < matrix.cols (); ++col)
+    {
+      octave_idx_type idx = matrix.cidx[col];
+      nextItem.push (
+          SparseElem<E> (matrix.ridx[idx], col, matrix.data[idx], idx));
+    }
+
+  octave_idx_type current_row = -1;
+
+  // mex is short for minimum excluded value.
+  // (meaning the smallest nonnegative integer which is not
+  // in a set. accum_mex is the mex of the columns containing
+  // nonzero elements in the current row.
+  // It's uncommon to find a use for the function
+  // outside of combinatoric game theory. In this case,
+  // a row contains a zero if accum_mex < width.
+  octave_idx_type accum_mex;
+
+  // It's either macro or copy-paste.  I chose the lesser
+  // of two evils.  If you don't like it, learn Lisp.
+  // ***
+#define finish_row \
+      if (current_row >= 0 \
+       && accum_mex >= 0 \
+       && accum_mex  < matrix.cols ()) \
+        res.join_term (Elem<E> (accum_mex, 0)); \
+
+  while (!nextItem.empty ())
+    {
+      SparseElem<E> item = nextItem.front ();
+      nextItem.pop ();
+      if (item.row != current_row)
+        {
+          // macro; see *** above
+          finish_row;
+
+          current_row = item.row;
+          if (item.col > 0)
+            {
+              res.add_term (item.row, Elem<E> (0, 0));
+              accum_mex = -1;
+            }
+          else
+            accum_mex = 1;
+          res.add_term (item.row, Elem<E> (item.col, item.value));
+        }
+      else
+        {
+          if (accum_mex != (octave_idx_type) (-1))
+            {
+              if (item.col == accum_mex)
+                ++accum_mex;
+              else
+                {
+                  res.join_term (Elem<E> (accum_mex, 0));
+                  accum_mex = -1;
+                }
+            }
+          res.join_term (Elem<E> (item.col, item.value));
+        }
+
+      octave_idx_type idx = item.idx + 1;
+      octave_idx_type col = item.col;
+      if (idx < matrix.cidx[col + 1])
+        {
+          nextItem.push (
+              SparseElem<E> (matrix.ridx[idx], col, matrix.data[idx], idx));
+        }
+    }
+
+  // macro; see *** above
+  finish_row;
+
+#undef finish_row
+
+  return res.post_process ();
+}