# HG changeset patch # User David Spies # Date 1402527586 21600 # Node ID 4f0d0b212fd6b75791a7d254d2354184dc66657f # Parent 2e0613dadfee107ae067f873175d82bbc9155c2e WIP diff -r 2e0613dadfee -r 4f0d0b212fd6 liboctave/util/accumulate.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/util/accumulate.cc Wed Jun 11 16:59:46 2014 -0600 @@ -0,0 +1,648 @@ +/* + +Copyright (C) 2014 David Spies + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +// A template for functions which accumulate all the values along +// the rows or columns of a matrix into a single row or column vector +// (more generally removing one dimension of a many-dimensional matrix) +// Functions which should call this one: +// min, max, sum, product, any, all +// +// TODO: Allow possible second return value for min and max +#include "dim-vector.h" +#include "Array.h" + +// TODO Support any/all with short-circuiting +enum fun_type +{ + min, max, sum, prod +}; + +const bool MIN = false; +const bool MAX = true; + +const int SUM = 0; +const int PROD = 1; + +const int COL_ACCUM = 0; +const int ROW_ACCUM = 1; + +// Pairs a value together with its index along the +// accumulated dimension +template +struct Elem +{ + const octave_idx_type accum_index; + const T value; + Elem (octave_idx_type accum_index, T value) : + accum_index (accum_index), value (value) { } +}; + +//Chooses < or > based on template argument +//(but only uses < operation) +template +bool +exceeds (E v1, E v2) { return (MinOrMax == MIN) ? v1 < v2 : v2 < v1; } + +template +struct FullMMAccum +{ +public: + + Array > res; + Array ind; + + FullMMAccum (const dim_vector& dims) : res (dims), ind (dims, 0) { } + + void + add (octave_idx_type to_index, Elem rhs) + { + if (ind (to_index) == 0 || exceeds (rhs.value, res (to_index))) + { + ind (to_index) = rhs.index + 1; + res (to_index) = rhs.value; + } + } + + octave_value_list + post_process (void) + { + return octave_value_list (res, ind); + } +}; + +// TODO Separate Sum and Prod +// for short-circuiting Prod +template +struct FullSPAccum +{ + + Array res; + + FullSPAccum (const dim_vector& dims) : res (dims, SorP) { } + + void + add (octave_idx_type to_index, Elem rhs) + { + if (SorP == PROD) + res (to_index) *= rhs.value; + else + res (to_index) += rhs.value; + } + + octave_value_list + post_process () + { + return octave_value_list (res); + } +}; + +template +struct SparseMMAccum +{ +private: + FullMMAccum my_accum; +public: + SparseMMAccum (dim_vector dims) : my_accum (dims) { } + + void + add (octave_idx_type ind, Elem value) + { + my_accum.add (ind, value); + } + + octave_value_list + post_process (void) + { + return octave_value_list (Sparse (my_accum.res), my_accum.ind); + } +}; + +// TODO Separate Sum and Prod +// for short-circuiting Prod +template +struct SparseSPAccum +{ +private: + FullSPAccum my_accum; +public: + SparseSPAccum (dim_vector dims) : my_accum (dims) { } + + void + add (octave_idx_type ind, Elem value) + { + my_accum.add (ind, value); + } + + octave_value_list + post_process (void) + { + return octave_value_list (Sparse (my_accum.res)); + } +}; + +template +octave_value_list +sparseColumn (octave_idx_type height, + std::vector > vec) +{ + Sparse res = Sparse (height, 1, vec.size ()); + res.xcidx (0) = 0; + res.xcidx (1) = vec.size (); + for (size_t i = 0; i < vec.size; ++i) + { + res.xridx (i) = vec[i].first; + res.xdata (i) = vec[i].second; + } + return octave_value_list (res); +} + +template +struct TallMMAccum +{ +private: + const octave_idx_type height; + std::vector > res_vec; + +public: + TallMMAccum (octave_idx_type height) : height (height) { } + + void + add_term (octave_idx_type row, Elem value) + { + res_vec.push_back (std::pair (row, value.value)); + } + + void + join_term (Elem value) + { + if (exceeds (value.value, res_vec.back ().second)) + { + res_vec.back ().second = value.value; + } + } + + octave_value_list + post_process (void) + { + return sparseColumn (height, res_vec); + } +}; + +template +struct TallSPAccum +{ +private: + const octave_idx_type height; + std::vector > res_vec; + +public: + TallSPAccum (octave_idx_type height) : height (height) { } + + void + add_term (octave_idx_type row, Elem value) + { + res_vec.push_back (std::pair (row, value.value)); + } + + void + join_term (Elem value) + { + res_vec.back ().second += value.value; + } + + octave_value_list + post_process () + { + return sparseColumn (height, res_vec); + } +}; + +// M is the type of matrix. Right now I'm only handling +// Array or Sparse. TODO What other types must I handle? +template +octave_value_list +accumulate (const M& matrix, int dim, int nargout, fun_type t) +{ + if (dim < 0) + { + // TODO Raise exception: how? + assert(false); + } + else if (dim >= matrix.ndims ()) + return octave_value_list (matrix); + // TODO Add is_sparse to SparseMatrix + // Is there another way to check? + else if (M::is_sparse) + return select_sparse_accumulate (matrix, dim, t, nargout); + else + return select_full_accumulate (matrix, dim, t); +} + +template +octave_value_list +select_full_accumulate (const Array& matrix, int dim, fun_type t) +{ + // TODO Deal with h = 0 and w = 0 + switch (t) + { + case min: + return full_accumulate > (matrix, dim); + case max: + return full_accumulate > (matrix, dim); + case sum: + return full_accumulate > (matrix, dim); + case prod: + return full_accumulate > (matrix, dim); + default: + assert(false); + // TODO Raise exception instead? + } +} + +// Change dim to a template parameter +template +octave_value_list +select_sparse_accumulate (const Sparse& matrix, int dim, fun_type t, + int nargout) +{ + switch (dim) + { + case ROW_ACCUM: + return select_sparse_accumulate (matrix, t, nargout); + case COL_ACCUM: + return select_sparse_accumulate (matrix, t, nargout); + default: + // This was already checked by accumulate + assert(false); + } +} + +// Accumulating over the columns of a sparse matrix is simple. +// The rows are a little more complicated +// There are two possible algorithms for accumulating over +// the rows of a sparse matrix +// the simple one runs in O(h+nnz) and just +// constructs a full result vector (which can +// be converted to a sparse vector). +// The tricky one uses a heap to construct a sparse +// result vector directly and runs in O(w+nnz*log(w)). +// We need to look at matrix's dimensions to see +// which one will work faster. If the matrix +// is tall and sparse enough, we use the second one. + +template +octave_value_list +select_sparse_accumulate (const Sparse& matrix, fun_type t, int nargout) +{ + octave_idx_type w = matrix.cols (); + octave_idx_type h = matrix.rows (); + octave_idx_type nnz = matrix.nnz (); + //TODO Deal with h = 0 and w = 0 + // For perfect optimality there should probably be some constants + // before each of these terms (determined by extensive profiling). + // But for big-O optimality, just making this comparison is good + // enough. + // Using float log because being precise isn't important. + if (nargout == 1 && dim == ROW_ACCUM && w + nnz * log2f ((float) w) < h + nnz) + { + switch (t) + { + case min: + return sparse_tall_row_accum > (matrix); + case max: + return sparse_tall_row_accum > (matrix); + case sum: + return sparse_tall_row_accum > (matrix); + case prod: + return sparse_tall_row_accum > (matrix); + } + } + else + { + switch (t) + { + case min: + return sparse_normal_accum, dim> (matrix); + case max: + return sparse_normal_accum, dim> (matrix); + case sum: + return sparse_normal_accum, dim> (matrix); + case prod: + return sparse_normal_accum, dim> (matrix); + } + } +} + +// excluded_indexer takes a dim_vector v and an "excluded dimension" d +// and iterates over the indices 0 to n (where n = v.numel()) while +// simultaneously maintaining an iterator over (v excluding d) meaning +// the index resulting from removing the dth element from v. +// ex. let v = [3,4,5] and d = 1 +// then we can use an excluded_indexer to iterate over the pairs +// from_idx to_idx +// +// 0 0 (= [0,0,0], [0,0]) +// 1 1 (= [1,0,0], [1,0]) +// 2 2 (= [2,0,0], [2,0]) +// 3 0 (= [0,1,0], [0,0]) +// 4 1 (= [1,1,0], [1,0]) +// 5 2 (= [2,1,0], [2,0]) +// 6 0 (= [0,2,0], [0,0]) +// 7 1 (= [1,2,0], [1,0]) +// 8 2 (= [2,2,0], [2,0]) +// 9 0 (= [0,3,0], [0,0]) +// 10 1 (= [1,3,0], [1,0]) +// 11 2 (= [2,3,0], [2,0]) +// 12 3 (= [0,0,1], [0,1]) +// 13 4 (= [1,0,1], [1,1]) +// 14 5 (= [2,0,1], [2,1]) +// 15 3 (= [0,1,1], [0,1]) +// ...etc... +// 58 13 (= [1,3,4], [1,3]) +// 59 14 (= [2,3,4], [2,4]) +// +// This is done extremely efficiently using only incrementing and subtraction +// (no multiplication,division modulo etc.) and is the most spatial-locality +// friendly way to iterate over the two matrices. + +struct excluded_indexer +{ +private: + const octave_idx_type before_size; + const octave_idx_type between_size; + + octave_idx_type from_idx; + octave_idx_type to_idx; + + octave_idx_type before_ctr; + octave_idx_type between_ctr; + +public: + + excluded_indexer (const dim_vector& sizes, int dim) : + before_size (product_prefix (sizes, dim)), between_size (sizes (dim)), + from_idx (0), to_idx (0), before_ctr (0), between_ctr (0) { } + + // Returns the index for the input (larger) matrix + octave_idx_type + get_from_idx (void) const + { + return from_idx; + } + + // Returns the index for the output (smaller) matrix + octave_idx_type + get_to_idx (void) const + { + return to_idx; + } + + octave_idx_type + get_accum_idx (void) const + { + return between_ctr; + } + + // Increments both indices and then subtracts from + // the output index if necessary + void + operator++ (void) + { + ++from_idx; + ++to_idx; + ++before_ctr; + if (before_ctr == before_size) + { + before_ctr = 0; + ++between_ctr; + if (between_ctr == between_size) + between_ctr = 0; + else + to_idx -= before_size; + } + } + + // Compares the input index with from_size + bool + operator< (octave_idx_type from_size) const + { + return from_idx < from_size; + } +}; + +// Accumulate method for full matrices +// Accumulator takes a dimension in its constructor. It supports methods +// void add(octave_idx_type, Elem) and octave_value_list post_process() +template +octave_value_list +full_accumulate (const Array& matrix, int dim) +{ + const dim_vector sizes = matrix.get_dims (); + dim_vector res_dims = sizes; + res_dims (dim) = 1; + // TODO Can I do this? Is it safe to mutate a dim_vector + // Does (res_dims = sizes) make a copy or copy the pointer? + Accumulator res (res_dims); + octave_idx_type from_numel = sizes.numel (); + octave_idx_type to_numel = res_dims.numel (); + for (excluded_indexer i (sizes, dim); i < from_numel; ++i) + { + res.add (i.get_to_idx (), + Elem (i.get_accum_idx (), matrix.xelem (i.get_from_idx ()))); + } + return res.post_process (); +} + +// Takes the product of the first dim dimensions of sizes +octave_idx_type +product_prefix (const dim_vector& sizes, int dim) +{ + assert(dim <= sizes.ndims ()); + octave_idx_type res = 1; + for (int i = 0; i < dim; ++i) + { + res *= sizes (i); + } + return res; +} + +template +octave_value_list +sparse_normal_accum (const Sparse& matrix) +{ + assert(accum_dir == 1 || accum_dir == 0); + dim_vector accum_dims = matrix.dims (); + octave_idx_type idx_size = accum_dims (1 - accum_dir); + octave_idx_type accum_size = accum_dims (accum_dir); + accum_dims (accum_dir) = 1; + Accumulator res (accum_dims); + std::vector known_mex (idx_size, 0); + for (octave_idx_type col = 0; col < matrix.cols (); ++col) + { + for (octave_idx_type i = matrix.cidx (col); i < matrix.cidx (col + 1); + ++i) + { + octave_idx_type res_idx; + octave_idx_type accum_idx; + if (accum_dir == COL_ACCUM) + { + res_idx = col; + accum_idx = matrix.ridx (i); + } + else if (accum_dir == ROW_ACCUM) + { + res_idx = matrix.ridx (i); + accum_idx = col; + } + else + { + assert(false); + } + if (known_mex[res_idx] != (octave_idx_type) (-1)) + { + if (accum_idx == known_mex[res_idx]) + ++known_mex[res_idx]; + else + { + assert(accum_idx > known_mex[res_idx]); + res.add (res_idx, Elem (known_mex[res_idx], 0)); + known_mex[res_idx] = (octave_idx_type) (-1); + } + } + res.add (res_idx, Elem (accum_idx, matrix.data[i])); + } + } + for (octave_idx_type i = 0; i < idx_size; ++i) + { + if (known_mex[i] != (octave_idx_type) (-1) && known_mex[i] < accum_size) + { + res.add (i, Elem (known_mex[i], 0)); + } + } + return res.post_process (); +} + +// Knows everything about an index-value pair +// in a sparse matrix (row, col, nnz_idx, value) +template +struct SparseElem +{ + const octave_idx_type row; + const octave_idx_type col; + const E value; + const octave_idx_type idx; + + SparseElem (octave_idx_type row, octave_idx_type col, E value, + octave_idx_type idx) : + row (row), col (col), value (value), idx (idx) { } + + bool + operator< (const SparseElem rhs) const + { + return row < rhs.row || (row == rhs.row && col < rhs.col); + } +}; + +template +octave_value_list +tall_sparse_row_accum (const Sparse& matrix) +{ + TallAccumulator res; + + std::priority_queue > nextItem; + for (octave_idx_type col = 0; col < matrix.cols (); ++col) + { + octave_idx_type idx = matrix.cidx[col]; + nextItem.push ( + SparseElem (matrix.ridx[idx], col, matrix.data[idx], idx)); + } + + octave_idx_type current_row = -1; + + // mex is short for minimum excluded value. + // (meaning the smallest nonnegative integer which is not + // in a set. accum_mex is the mex of the columns containing + // nonzero elements in the current row. + // It's uncommon to find a use for the function + // outside of combinatoric game theory. In this case, + // a row contains a zero if accum_mex < width. + octave_idx_type accum_mex; + + // It's either macro or copy-paste. I chose the lesser + // of two evils. If you don't like it, learn Lisp. + // *** +#define finish_row \ + if (current_row >= 0 \ + && accum_mex >= 0 \ + && accum_mex < matrix.cols ()) \ + res.join_term (Elem (accum_mex, 0)); \ + + while (!nextItem.empty ()) + { + SparseElem item = nextItem.front (); + nextItem.pop (); + if (item.row != current_row) + { + // macro; see *** above + finish_row; + + current_row = item.row; + if (item.col > 0) + { + res.add_term (item.row, Elem (0, 0)); + accum_mex = -1; + } + else + accum_mex = 1; + res.add_term (item.row, Elem (item.col, item.value)); + } + else + { + if (accum_mex != (octave_idx_type) (-1)) + { + if (item.col == accum_mex) + ++accum_mex; + else + { + res.join_term (Elem (accum_mex, 0)); + accum_mex = -1; + } + } + res.join_term (Elem (item.col, item.value)); + } + + octave_idx_type idx = item.idx + 1; + octave_idx_type col = item.col; + if (idx < matrix.cidx[col + 1]) + { + nextItem.push ( + SparseElem (matrix.ridx[idx], col, matrix.data[idx], idx)); + } + } + + // macro; see *** above + finish_row; + +#undef finish_row + + return res.post_process (); +}