# HG changeset patch # User Jaroslav Hajek # Date 1236592759 -3600 # Node ID c2099a4d12ead68ffdb19fb73061509008c4e25d # Parent 346fde2030b5a12fd83f9577b2bf2035c149f0d6 partially optimize accumarray diff -r 346fde2030b5 -r c2099a4d12ea liboctave/ChangeLog --- a/liboctave/ChangeLog Sun Mar 08 19:18:44 2009 +0100 +++ b/liboctave/ChangeLog Mon Mar 09 10:59:19 2009 +0100 @@ -1,3 +1,11 @@ +2009-03-08 Jaroslav Hajek + + * idx-vector.h (idx_vector::bloop): loop --> bloop. + (idx_vector::loop): New method. + * MArray.cc (MArray::idx_add (cons idx_vector&, T)) + (MArray::idx_add (cons idx_vector&, const MArray&)): New methods. + * MArray.h: Declare them. + 2009-03-05 Jason Riedy * Sparse.h (Sparse::elt_type): Remove typedef, replace with: diff -r 346fde2030b5 -r c2099a4d12ea liboctave/MArray.cc --- a/liboctave/MArray.cc Sun Mar 08 19:18:44 2009 +0100 +++ b/liboctave/MArray.cc Mon Mar 09 10:59:19 2009 +0100 @@ -2,6 +2,7 @@ Copyright (C) 1993, 1995, 1996, 1997, 2000, 2002, 2003, 2004, 2005, 2007, 2008 John W. Eaton +Copyright (C) 2009 VZLU Prague This file is part of Octave. @@ -53,6 +54,62 @@ return 0; } +template +struct _idxadds_helper +{ + T *array; + T val; + _idxadds_helper (T *a, T v) : array (a), val (v) { } + void operator () (octave_idx_type i) + { array[i] += val; } +}; + +template +struct _idxadda_helper +{ + T *array; + const T *vals; + _idxadda_helper (T *a, const T *v) : array (a), vals (v) { } + void operator () (octave_idx_type i) + { array[i] += *vals++; } +}; + +template +void +MArray::idx_add (const idx_vector& idx, T val) +{ + octave_idx_type n = this->length (); + octave_idx_type ext = idx.extent (n); + if (ext > n) + { + this->resize (ext); + n = ext; + } + + OCTAVE_QUIT; + + octave_idx_type len = idx.length (n); + idx.loop (len, _idxadds_helper (this->fortran_vec (), val)); +} + +template +void +MArray::idx_add (const idx_vector& idx, const MArray& vals) +{ + octave_idx_type n = this->length (); + octave_idx_type ext = idx.extent (n); + if (ext > n) + { + this->resize (ext); + n = ext; + } + + OCTAVE_QUIT; + + octave_idx_type len = std::min (idx.length (n), vals.length ()); + idx.loop (len, _idxadda_helper (this->fortran_vec (), vals.data ())); +} + // Element by element MArray by scalar ops. template diff -r 346fde2030b5 -r c2099a4d12ea liboctave/MArray.h --- a/liboctave/MArray.h Sun Mar 08 19:18:44 2009 +0100 +++ b/liboctave/MArray.h Mon Mar 09 10:59:19 2009 +0100 @@ -92,6 +92,12 @@ return Array::template map (fcn); } + // Performs indexed accumulative addition. + + void idx_add (const idx_vector& idx, T val); + + void idx_add (const idx_vector& idx, const MArray& vals); + // Currently, the OPS functions don't need to be friends, but that // may change. diff -r 346fde2030b5 -r c2099a4d12ea liboctave/idx-vector.h --- a/liboctave/idx-vector.h Sun Mar 08 19:18:44 2009 +0100 +++ b/liboctave/idx-vector.h Mon Mar 09 10:59:19 2009 +0100 @@ -657,19 +657,70 @@ return len; } + // Generic non-breakable indexed loop. The loop body should be encapsulated in a + // single functor body. + // This is equivalent to the following loop (but faster, at least for simple + // inlined bodies): + // + // for (octave_idx_type i = 0; i < idx->length (n); i++) body (idx(i)); + // + + template + void + loop (octave_idx_type n, Functor body) const + { + octave_idx_type len = rep->length (n); + switch (rep->idx_class ()) + { + case class_colon: + for (octave_idx_type i = 0; i < len; i++) body (i); + break; + case class_range: + { + idx_range_rep * r = dynamic_cast (rep); + octave_idx_type start = r->get_start (), step = r->get_step (); + octave_idx_type i, j; + if (step == 1) + for (i = start, j = start + len; i < j; i++) body (i); + else if (step == -1) + for (i = start, j = start - len; i > j; i--) body (i); + else + for (i = 0, j = start; i < len; i++, j += step) body (j); + } + break; + case class_scalar: + { + idx_scalar_rep * r = dynamic_cast (rep); + body (r->get_data ()); + } + break; + case class_vector: + { + idx_vector_rep * r = dynamic_cast (rep); + const octave_idx_type *data = r->get_data (); + for (octave_idx_type i = 0; i < len; i++) body (data[i]); + } + break; + default: + assert (false); + break; + } + + } + // Generic breakable indexed loop. The loop body should be encapsulated in a // single functor body. // This is equivalent to the following loop (but faster, at least for simple // inlined bodies): // // for (octave_idx_type i = 0; i < idx->length (n); i++) - // if (body (i, idx(i))) break; + // if (body (idx(i))) break; // return i; // template octave_idx_type - loop (octave_idx_type n, const Functor& body) const + bloop (octave_idx_type n, Functor body) const { octave_idx_type len = rep->length (n), ret; switch (rep->idx_class ()) @@ -704,7 +755,7 @@ case class_vector: { idx_vector_rep * r = dynamic_cast (rep); - octave_idx_type *data = r->get_data (); + const octave_idx_type *data = r->get_data (); octave_idx_type i; for (i = 0; i < len && body (data[i]); i++) ; ret = i; diff -r 346fde2030b5 -r c2099a4d12ea scripts/ChangeLog --- a/scripts/ChangeLog Sun Mar 08 19:18:44 2009 +0100 +++ b/scripts/ChangeLog Mon Mar 09 10:59:19 2009 +0100 @@ -1,3 +1,8 @@ +2009-03-09 Jaroslav Hajek + + * general/accumarray.m: Reorder tests. Call either "sparse" or + __accumarray_sum__ for the default summation case. + 2009-03-08 Søren Hauberg * statistics/base/histc.m: New function. diff -r 346fde2030b5 -r c2099a4d12ea scripts/general/accumarray.m --- a/scripts/general/accumarray.m Sun Mar 08 19:18:44 2009 +0100 +++ b/scripts/general/accumarray.m Mon Mar 09 10:59:19 2009 +0100 @@ -1,4 +1,5 @@ ## Copyright (C) 2007, 2008, 2009 David Bateman +## Copyright (C) 2009 VZLU Prague ## ## This file is part of Octave. ## @@ -64,20 +65,6 @@ endif ndims = size (subs, 2); - if (nargin < 3 || isempty (sz)) - sz = max (subs); - if (isscalar(sz)) - sz = [sz, 1]; - endif - elseif (length (sz) != ndims - && (ndims != 1 || length (sz) != 2 || sz(2) != 1)) - error ("accumarray: inconsistent dimensions"); - endif - - if (nargin < 4 || isempty (fun)) - fun = @sum; - endif - if (nargin < 5 || isempty (fillval)) fillval = 0; endif @@ -90,6 +77,71 @@ error ("accumarray: sparse matrices limited to 2 dimensions"); endif + if (nargin < 4 || isempty (fun)) + fun = @sum; + ## This is the fast summation case. Unlike the general case, + ## this case will be handled using an O(N) algorithm. + + if (isspar && fillval == 0) + ## The "sparse" function can handle this case. + + if ((nargin < 3 || isempty (sz))) + A = sparse (subs(:,1), subs(:,2), val); + elseif (length (sz) == 2) + A = sparse (subs(:,1), subs(:,2), val, sz(1), sz(2)); + else + error ("accumarray: dimensions mismatch") + endif + else + ## This case is handled by an internal function. + + if (ndims > 1) + if ((nargin < 3 || isempty (sz))) + sz = max (subs); + elseif (ndims != length (sz)) + error ("accumarray: dimensions mismatch") + elseif (any (max (subs) > sz)) + error ("accumarray: index out of range") + endif + + ## Convert multidimensional subscripts. + subs = sub2ind (sz, mat2cell (subs, rows (subs), ones (1, ndims)){:}); + elseif (nargin < 3) + ## In case of linear indexing, the fast built-in accumulator + ## will determine the extent for us. + sz = []; + endif + + ## Call the built-in accumulator. + if (isempty (sz)) + A = __accumarray_sum__ (subs, val); + else + A = __accumarray_sum__ (subs, val, prod (sz)); + ## set proper shape. + A = reshape (A, sz); + endif + + ## we fill in nonzero fill value. + if (fillval != 0) + mask = true (size (A)); + mask(subs) = false; + A(mask) = fillval; + endif + endif + + return + endif + + if (nargin < 3 || isempty (sz)) + sz = max (subs); + if (isscalar(sz)) + sz = [sz, 1]; + endif + elseif (length (sz) != ndims + && (ndims != 1 || length (sz) != 2 || sz(2) != 1)) + error ("accumarray: inconsistent dimensions"); + endif + [subs, idx] = sortrows (subs); if (isscalar (val)) diff -r 346fde2030b5 -r c2099a4d12ea src/ChangeLog --- a/src/ChangeLog Sun Mar 08 19:18:44 2009 +0100 +++ b/src/ChangeLog Mon Mar 09 10:59:19 2009 +0100 @@ -1,3 +1,8 @@ +2009-03-09 Jaroslav Hajek + + * data.cc (F__accumarray_sum__): New function. + (do_accumarray_sum): New helper template function. + 2009-03-07 Jaroslav Hajek * xdiv.cc (mdm_div_impl, dmm_lelftdiv_impl, dmdm_div_impl, diff -r 346fde2030b5 -r c2099a4d12ea src/data.cc --- a/src/data.cc Sun Mar 08 19:18:44 2009 +0100 +++ b/src/data.cc Mon Mar 09 10:59:19 2009 +0100 @@ -5724,6 +5724,78 @@ return retval; } +template +static NDT +do_accumarray_sum (const idx_vector& idx, const NDT& vals, + octave_idx_type n = -1) +{ + typedef typename NDT::element_type T; + if (n < 0) + n = idx.extent (0); + else if (idx.extent (n) > n) + error ("accumarray: index out of range"); + + // FIXME: the class tree in liboctave is overly complicated, hence the + // following type gymnastics. + MArray array; + + if (vals.numel () == 1) + { + array = MArray (n, T ()); + array.idx_add (idx, vals (0)); + } + else if (vals.length () == idx.length (n)) + { + array = MArray (n, T ()); + array.idx_add (idx, MArray (vals)); + } + else + error ("accumarray: dimensions mismatch"); + + return NDT (MArrayN (ArrayN (array))); +} + +DEFUN (__accumarray_sum__, args, , + "-*- texinfo -*-\n\ +@deftypefn {Built-in Function} {} __accumarray_sum__ (@var{idx}, @var{vals}, @var{n})\n\ +Undocumented internal function.\n\ +@end deftypefn") +{ + octave_value retval; + int nargin = args.length (); + if (nargin >= 2 && nargin <= 3 && args(0).is_numeric_type ()) + { + idx_vector idx = args(0).index_vector (); + octave_idx_type n = -1; + if (nargin == 3) + n = args(2).idx_type_value (true); + + if (! error_state) + { + octave_value vals = args(1); + if (vals.is_single_type ()) + { + if (vals.is_complex_type ()) + retval = do_accumarray_sum (idx, vals.float_complex_array_value (), n); + else + retval = do_accumarray_sum (idx, vals.float_array_value (), n); + } + else if (vals.is_numeric_type ()) + { + if (vals.is_complex_type ()) + retval = do_accumarray_sum (idx, vals.complex_array_value (), n); + else + retval = do_accumarray_sum (idx, vals.array_value (), n); + } + else + gripe_wrong_type_arg ("accumarray", vals); + } + } + else + print_usage (); + + return retval; +} /*