Mercurial > octave-dspies
view liboctave/util/accumulate.cc @ 19007:4f0d0b212fd6 draft
WIP
author | David Spies <dnspies@gmail.com> |
---|---|
date | Wed, 11 Jun 2014 16:59:46 -0600 |
parents | |
children |
line wrap: on
line source
/* Copyright (C) 2014 David Spies This file is part of Octave. Octave is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. Octave is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Octave; see the file COPYING. If not, see <http://www.gnu.org/licenses/>. */ // A template for functions which accumulate all the values along // the rows or columns of a matrix into a single row or column vector // (more generally removing one dimension of a many-dimensional matrix) // Functions which should call this one: // min, max, sum, product, any, all // // TODO: Allow possible second return value for min and max #include "dim-vector.h" #include "Array.h" // TODO Support any/all with short-circuiting enum fun_type { min, max, sum, prod }; const bool MIN = false; const bool MAX = true; const int SUM = 0; const int PROD = 1; const int COL_ACCUM = 0; const int ROW_ACCUM = 1; // Pairs a value together with its index along the // accumulated dimension template<typename T> struct Elem { const octave_idx_type accum_index; const T value; Elem (octave_idx_type accum_index, T value) : accum_index (accum_index), value (value) { } }; //Chooses < or > based on template argument //(but only uses < operation) template<typename E, bool MinOrMax> bool exceeds (E v1, E v2) { return (MinOrMax == MIN) ? v1 < v2 : v2 < v1; } template<typename E, bool MinOrMax> struct FullMMAccum { public: Array<Elem<E> > res; Array<double> ind; FullMMAccum (const dim_vector& dims) : res (dims), ind (dims, 0) { } void add (octave_idx_type to_index, Elem<E> rhs) { if (ind (to_index) == 0 || exceeds<E, MinOrMax> (rhs.value, res (to_index))) { ind (to_index) = rhs.index + 1; res (to_index) = rhs.value; } } octave_value_list post_process (void) { return octave_value_list (res, ind); } }; // TODO Separate Sum and Prod // for short-circuiting Prod template<typename E, int SorP> struct FullSPAccum { Array<E> res; FullSPAccum (const dim_vector& dims) : res (dims, SorP) { } void add (octave_idx_type to_index, Elem<E> rhs) { if (SorP == PROD) res (to_index) *= rhs.value; else res (to_index) += rhs.value; } octave_value_list post_process () { return octave_value_list (res); } }; template<typename E, bool MinOrMax> struct SparseMMAccum { private: FullMMAccum<E, MinOrMax> my_accum; public: SparseMMAccum (dim_vector dims) : my_accum (dims) { } void add (octave_idx_type ind, Elem<E> value) { my_accum.add (ind, value); } octave_value_list post_process (void) { return octave_value_list (Sparse<E> (my_accum.res), my_accum.ind); } }; // TODO Separate Sum and Prod // for short-circuiting Prod template<typename E, int SorP> struct SparseSPAccum { private: FullSPAccum<E, SorP> my_accum; public: SparseSPAccum (dim_vector dims) : my_accum (dims) { } void add (octave_idx_type ind, Elem<E> value) { my_accum.add (ind, value); } octave_value_list post_process (void) { return octave_value_list (Sparse<E> (my_accum.res)); } }; template<typename E> octave_value_list sparseColumn (octave_idx_type height, std::vector<std::pair<octave_idx_type, E> > vec) { Sparse<E> res = Sparse<E> (height, 1, vec.size ()); res.xcidx (0) = 0; res.xcidx (1) = vec.size (); for (size_t i = 0; i < vec.size; ++i) { res.xridx (i) = vec[i].first; res.xdata (i) = vec[i].second; } return octave_value_list (res); } template<typename E, bool MinOrMax> struct TallMMAccum { private: const octave_idx_type height; std::vector<std::pair<octave_idx_type, E> > res_vec; public: TallMMAccum (octave_idx_type height) : height (height) { } void add_term (octave_idx_type row, Elem<E> value) { res_vec.push_back (std::pair (row, value.value)); } void join_term (Elem<E> value) { if (exceeds<E, MinOrMax> (value.value, res_vec.back ().second)) { res_vec.back ().second = value.value; } } octave_value_list post_process (void) { return sparseColumn (height, res_vec); } }; template<typename E, int SorP> struct TallSPAccum { private: const octave_idx_type height; std::vector<std::pair<octave_idx_type, E> > res_vec; public: TallSPAccum (octave_idx_type height) : height (height) { } void add_term (octave_idx_type row, Elem<E> value) { res_vec.push_back (std::pair (row, value.value)); } void join_term (Elem<E> value) { res_vec.back ().second += value.value; } octave_value_list post_process () { return sparseColumn (height, res_vec); } }; // M is the type of matrix. Right now I'm only handling // Array or Sparse. TODO What other types must I handle? template<typename M> octave_value_list accumulate (const M& matrix, int dim, int nargout, fun_type t) { if (dim < 0) { // TODO Raise exception: how? assert(false); } else if (dim >= matrix.ndims ()) return octave_value_list (matrix); // TODO Add is_sparse to SparseMatrix // Is there another way to check? else if (M::is_sparse) return select_sparse_accumulate<M::element_type> (matrix, dim, t, nargout); else return select_full_accumulate<M::element_type> (matrix, dim, t); } template<typename E> octave_value_list select_full_accumulate (const Array<E>& matrix, int dim, fun_type t) { // TODO Deal with h = 0 and w = 0 switch (t) { case min: return full_accumulate<E, FullMMAccum<E, MIN> > (matrix, dim); case max: return full_accumulate<E, FullMMAccum<E, MAX> > (matrix, dim); case sum: return full_accumulate<E, FullSPAccum<E, SUM> > (matrix, dim); case prod: return full_accumulate<E, FullSPAccum<E, PROD> > (matrix, dim); default: assert(false); // TODO Raise exception instead? } } // Change dim to a template parameter template<typename E> octave_value_list select_sparse_accumulate (const Sparse<E>& matrix, int dim, fun_type t, int nargout) { switch (dim) { case ROW_ACCUM: return select_sparse_accumulate<E, ROW_ACCUM> (matrix, t, nargout); case COL_ACCUM: return select_sparse_accumulate<E, COL_ACCUM> (matrix, t, nargout); default: // This was already checked by accumulate assert(false); } } // Accumulating over the columns of a sparse matrix is simple. // The rows are a little more complicated // There are two possible algorithms for accumulating over // the rows of a sparse matrix // the simple one runs in O(h+nnz) and just // constructs a full result vector (which can // be converted to a sparse vector). // The tricky one uses a heap to construct a sparse // result vector directly and runs in O(w+nnz*log(w)). // We need to look at matrix's dimensions to see // which one will work faster. If the matrix // is tall and sparse enough, we use the second one. template<typename E, int dim> octave_value_list select_sparse_accumulate (const Sparse<E>& matrix, fun_type t, int nargout) { octave_idx_type w = matrix.cols (); octave_idx_type h = matrix.rows (); octave_idx_type nnz = matrix.nnz (); //TODO Deal with h = 0 and w = 0 // For perfect optimality there should probably be some constants // before each of these terms (determined by extensive profiling). // But for big-O optimality, just making this comparison is good // enough. // Using float log because being precise isn't important. if (nargout == 1 && dim == ROW_ACCUM && w + nnz * log2f ((float) w) < h + nnz) { switch (t) { case min: return sparse_tall_row_accum<E, TallMMAccum<E, MIN> > (matrix); case max: return sparse_tall_row_accum<E, TallMMAccum<E, MAX> > (matrix); case sum: return sparse_tall_row_accum<E, TallSPAccum<E, SUM> > (matrix); case prod: return sparse_tall_row_accum<E, TallSPAccum<E, PROD> > (matrix); } } else { switch (t) { case min: return sparse_normal_accum<E, SparseMMAccum<E, MIN>, dim> (matrix); case max: return sparse_normal_accum<E, SparseMMAccum<E, MAX>, dim> (matrix); case sum: return sparse_normal_accum<E, SparseSPAccum<E, SUM>, dim> (matrix); case prod: return sparse_normal_accum<E, SparseSPAccum<E, PROD>, dim> (matrix); } } } // excluded_indexer takes a dim_vector v and an "excluded dimension" d // and iterates over the indices 0 to n (where n = v.numel()) while // simultaneously maintaining an iterator over (v excluding d) meaning // the index resulting from removing the dth element from v. // ex. let v = [3,4,5] and d = 1 // then we can use an excluded_indexer to iterate over the pairs // from_idx to_idx // // 0 0 (= [0,0,0], [0,0]) // 1 1 (= [1,0,0], [1,0]) // 2 2 (= [2,0,0], [2,0]) // 3 0 (= [0,1,0], [0,0]) // 4 1 (= [1,1,0], [1,0]) // 5 2 (= [2,1,0], [2,0]) // 6 0 (= [0,2,0], [0,0]) // 7 1 (= [1,2,0], [1,0]) // 8 2 (= [2,2,0], [2,0]) // 9 0 (= [0,3,0], [0,0]) // 10 1 (= [1,3,0], [1,0]) // 11 2 (= [2,3,0], [2,0]) // 12 3 (= [0,0,1], [0,1]) // 13 4 (= [1,0,1], [1,1]) // 14 5 (= [2,0,1], [2,1]) // 15 3 (= [0,1,1], [0,1]) // ...etc... // 58 13 (= [1,3,4], [1,3]) // 59 14 (= [2,3,4], [2,4]) // // This is done extremely efficiently using only incrementing and subtraction // (no multiplication,division modulo etc.) and is the most spatial-locality // friendly way to iterate over the two matrices. struct excluded_indexer { private: const octave_idx_type before_size; const octave_idx_type between_size; octave_idx_type from_idx; octave_idx_type to_idx; octave_idx_type before_ctr; octave_idx_type between_ctr; public: excluded_indexer (const dim_vector& sizes, int dim) : before_size (product_prefix (sizes, dim)), between_size (sizes (dim)), from_idx (0), to_idx (0), before_ctr (0), between_ctr (0) { } // Returns the index for the input (larger) matrix octave_idx_type get_from_idx (void) const { return from_idx; } // Returns the index for the output (smaller) matrix octave_idx_type get_to_idx (void) const { return to_idx; } octave_idx_type get_accum_idx (void) const { return between_ctr; } // Increments both indices and then subtracts from // the output index if necessary void operator++ (void) { ++from_idx; ++to_idx; ++before_ctr; if (before_ctr == before_size) { before_ctr = 0; ++between_ctr; if (between_ctr == between_size) between_ctr = 0; else to_idx -= before_size; } } // Compares the input index with from_size bool operator< (octave_idx_type from_size) const { return from_idx < from_size; } }; // Accumulate method for full matrices // Accumulator takes a dimension in its constructor. It supports methods // void add(octave_idx_type, Elem<E>) and octave_value_list post_process() template<typename E, typename Accumulator> octave_value_list full_accumulate (const Array<E>& matrix, int dim) { const dim_vector sizes = matrix.get_dims (); dim_vector res_dims = sizes; res_dims (dim) = 1; // TODO Can I do this? Is it safe to mutate a dim_vector // Does (res_dims = sizes) make a copy or copy the pointer? Accumulator res (res_dims); octave_idx_type from_numel = sizes.numel (); octave_idx_type to_numel = res_dims.numel (); for (excluded_indexer i (sizes, dim); i < from_numel; ++i) { res.add (i.get_to_idx (), Elem<E> (i.get_accum_idx (), matrix.xelem (i.get_from_idx ()))); } return res.post_process (); } // Takes the product of the first dim dimensions of sizes octave_idx_type product_prefix (const dim_vector& sizes, int dim) { assert(dim <= sizes.ndims ()); octave_idx_type res = 1; for (int i = 0; i < dim; ++i) { res *= sizes (i); } return res; } template<typename E, typename Accumulator, int accum_dir> octave_value_list sparse_normal_accum (const Sparse<E>& matrix) { assert(accum_dir == 1 || accum_dir == 0); dim_vector accum_dims = matrix.dims (); octave_idx_type idx_size = accum_dims (1 - accum_dir); octave_idx_type accum_size = accum_dims (accum_dir); accum_dims (accum_dir) = 1; Accumulator res (accum_dims); std::vector<octave_idx_type> known_mex (idx_size, 0); for (octave_idx_type col = 0; col < matrix.cols (); ++col) { for (octave_idx_type i = matrix.cidx (col); i < matrix.cidx (col + 1); ++i) { octave_idx_type res_idx; octave_idx_type accum_idx; if (accum_dir == COL_ACCUM) { res_idx = col; accum_idx = matrix.ridx (i); } else if (accum_dir == ROW_ACCUM) { res_idx = matrix.ridx (i); accum_idx = col; } else { assert(false); } if (known_mex[res_idx] != (octave_idx_type) (-1)) { if (accum_idx == known_mex[res_idx]) ++known_mex[res_idx]; else { assert(accum_idx > known_mex[res_idx]); res.add (res_idx, Elem<E> (known_mex[res_idx], 0)); known_mex[res_idx] = (octave_idx_type) (-1); } } res.add (res_idx, Elem<E> (accum_idx, matrix.data[i])); } } for (octave_idx_type i = 0; i < idx_size; ++i) { if (known_mex[i] != (octave_idx_type) (-1) && known_mex[i] < accum_size) { res.add (i, Elem<E> (known_mex[i], 0)); } } return res.post_process (); } // Knows everything about an index-value pair // in a sparse matrix (row, col, nnz_idx, value) template<typename E> struct SparseElem { const octave_idx_type row; const octave_idx_type col; const E value; const octave_idx_type idx; SparseElem (octave_idx_type row, octave_idx_type col, E value, octave_idx_type idx) : row (row), col (col), value (value), idx (idx) { } bool operator< (const SparseElem<E> rhs) const { return row < rhs.row || (row == rhs.row && col < rhs.col); } }; template<typename E, typename TallAccumulator> octave_value_list tall_sparse_row_accum (const Sparse<E>& matrix) { TallAccumulator res; std::priority_queue<SparseElem<E> > nextItem; for (octave_idx_type col = 0; col < matrix.cols (); ++col) { octave_idx_type idx = matrix.cidx[col]; nextItem.push ( SparseElem<E> (matrix.ridx[idx], col, matrix.data[idx], idx)); } octave_idx_type current_row = -1; // mex is short for minimum excluded value. // (meaning the smallest nonnegative integer which is not // in a set. accum_mex is the mex of the columns containing // nonzero elements in the current row. // It's uncommon to find a use for the function // outside of combinatoric game theory. In this case, // a row contains a zero if accum_mex < width. octave_idx_type accum_mex; // It's either macro or copy-paste. I chose the lesser // of two evils. If you don't like it, learn Lisp. // *** #define finish_row \ if (current_row >= 0 \ && accum_mex >= 0 \ && accum_mex < matrix.cols ()) \ res.join_term (Elem<E> (accum_mex, 0)); \ while (!nextItem.empty ()) { SparseElem<E> item = nextItem.front (); nextItem.pop (); if (item.row != current_row) { // macro; see *** above finish_row; current_row = item.row; if (item.col > 0) { res.add_term (item.row, Elem<E> (0, 0)); accum_mex = -1; } else accum_mex = 1; res.add_term (item.row, Elem<E> (item.col, item.value)); } else { if (accum_mex != (octave_idx_type) (-1)) { if (item.col == accum_mex) ++accum_mex; else { res.join_term (Elem<E> (accum_mex, 0)); accum_mex = -1; } } res.join_term (Elem<E> (item.col, item.value)); } octave_idx_type idx = item.idx + 1; octave_idx_type col = item.col; if (idx < matrix.cidx[col + 1]) { nextItem.push ( SparseElem<E> (matrix.ridx[idx], col, matrix.data[idx], idx)); } } // macro; see *** above finish_row; #undef finish_row return res.post_process (); }