changeset 8934:c2099a4d12ea

partially optimize accumarray
author Jaroslav Hajek <highegg@gmail.com>
date Mon, 09 Mar 2009 10:59:19 +0100
parents 346fde2030b5
children cae073411b03
files liboctave/ChangeLog liboctave/MArray.cc liboctave/MArray.h liboctave/idx-vector.h scripts/ChangeLog scripts/general/accumarray.m src/ChangeLog src/data.cc
diffstat 8 files changed, 273 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/liboctave/ChangeLog	Sun Mar 08 19:18:44 2009 +0100
+++ b/liboctave/ChangeLog	Mon Mar 09 10:59:19 2009 +0100
@@ -1,3 +1,11 @@
+2009-03-08  Jaroslav Hajek  <highegg@gmail.com>
+
+	* idx-vector.h (idx_vector::bloop): loop --> bloop.
+	(idx_vector::loop): New method.
+	* MArray.cc (MArray<T>::idx_add (cons idx_vector&, T))
+	(MArray<T>::idx_add (cons idx_vector&, const MArray<T>&)): New methods.
+	* MArray.h: Declare them.
+
 2009-03-05  Jason Riedy  <jason@acm.org>
 
 	* Sparse.h (Sparse<T>::elt_type): Remove typedef, replace with:
--- a/liboctave/MArray.cc	Sun Mar 08 19:18:44 2009 +0100
+++ b/liboctave/MArray.cc	Mon Mar 09 10:59:19 2009 +0100
@@ -2,6 +2,7 @@
 
 Copyright (C) 1993, 1995, 1996, 1997, 2000, 2002, 2003, 2004, 2005,
               2007, 2008 John W. Eaton
+Copyright (C) 2009 VZLU Prague              
 
 This file is part of Octave.
 
@@ -53,6 +54,62 @@
   return 0;
 }
 
+template <class T>
+struct _idxadds_helper
+{
+  T *array;
+  T val;
+  _idxadds_helper (T *a, T v) : array (a), val (v) { }
+  void operator () (octave_idx_type i)
+    { array[i] += val; }
+};
+
+template <class T>
+struct _idxadda_helper
+{
+  T *array;
+  const T *vals;
+  _idxadda_helper (T *a, const T *v) : array (a), vals (v) { }
+  void operator () (octave_idx_type i)
+    { array[i] += *vals++; }
+};
+
+template <class T>
+void
+MArray<T>::idx_add (const idx_vector& idx, T val)
+{
+  octave_idx_type n = this->length ();
+  octave_idx_type ext = idx.extent (n);
+  if (ext > n)
+    {
+      this->resize (ext);
+      n = ext;
+    }
+
+  OCTAVE_QUIT;
+
+  octave_idx_type len = idx.length (n);
+  idx.loop (len, _idxadds_helper<T> (this->fortran_vec (), val));
+}
+
+template <class T>
+void
+MArray<T>::idx_add (const idx_vector& idx, const MArray<T>& vals)
+{
+  octave_idx_type n = this->length ();
+  octave_idx_type ext = idx.extent (n);
+  if (ext > n)
+    {
+      this->resize (ext);
+      n = ext;
+    }
+
+  OCTAVE_QUIT;
+
+  octave_idx_type len = std::min (idx.length (n), vals.length ());
+  idx.loop (len, _idxadda_helper<T> (this->fortran_vec (), vals.data ()));
+}
+
 // Element by element MArray by scalar ops.
 
 template <class T>
--- a/liboctave/MArray.h	Sun Mar 08 19:18:44 2009 +0100
+++ b/liboctave/MArray.h	Mon Mar 09 10:59:19 2009 +0100
@@ -92,6 +92,12 @@
     return Array<T>::template map<U> (fcn);
   }
 
+  // Performs indexed accumulative addition.
+
+  void idx_add (const idx_vector& idx, T val);
+
+  void idx_add (const idx_vector& idx, const MArray<T>& vals);
+
   // Currently, the OPS functions don't need to be friends, but that
   // may change.
 
--- a/liboctave/idx-vector.h	Sun Mar 08 19:18:44 2009 +0100
+++ b/liboctave/idx-vector.h	Mon Mar 09 10:59:19 2009 +0100
@@ -657,19 +657,70 @@
       return len;
     }
 
+  // Generic non-breakable indexed loop. The loop body should be encapsulated in a
+  // single functor body. 
+  // This is equivalent to the following loop (but faster, at least for simple
+  // inlined bodies):
+  //
+  // for (octave_idx_type i = 0; i < idx->length (n); i++) body (idx(i));
+  // 
+
+  template <class Functor>
+  void
+  loop (octave_idx_type n, Functor body) const
+    {
+      octave_idx_type len = rep->length (n);
+      switch (rep->idx_class ())
+        {
+        case class_colon:
+          for (octave_idx_type i = 0; i < len; i++) body (i);
+          break;
+        case class_range:
+          {
+            idx_range_rep * r = dynamic_cast<idx_range_rep *> (rep);
+            octave_idx_type start = r->get_start (), step = r->get_step ();
+            octave_idx_type i, j;
+            if (step == 1)
+              for (i = start, j = start + len; i < j; i++) body (i);
+            else if (step == -1)
+              for (i = start, j = start - len; i > j; i--) body (i);
+            else
+              for (i = 0, j = start; i < len; i++, j += step) body (j);
+          }
+          break;
+        case class_scalar:
+          {
+            idx_scalar_rep * r = dynamic_cast<idx_scalar_rep *> (rep);
+            body (r->get_data ());
+          }
+          break;
+        case class_vector:
+          {
+            idx_vector_rep * r = dynamic_cast<idx_vector_rep *> (rep);
+            const octave_idx_type *data = r->get_data ();
+            for (octave_idx_type i = 0; i < len; i++) body (data[i]);
+          }
+          break;
+        default:
+          assert (false);
+          break;
+        }
+
+    }
+
   // Generic breakable indexed loop. The loop body should be encapsulated in a
   // single functor body. 
   // This is equivalent to the following loop (but faster, at least for simple
   // inlined bodies):
   //
   // for (octave_idx_type i = 0; i < idx->length (n); i++)
-  //   if (body (i, idx(i))) break;
+  //   if (body (idx(i))) break;
   // return i;
   // 
 
   template <class Functor>
   octave_idx_type
-  loop (octave_idx_type n, const Functor& body) const
+  bloop (octave_idx_type n, Functor body) const
     {
       octave_idx_type len = rep->length (n), ret;
       switch (rep->idx_class ())
@@ -704,7 +755,7 @@
         case class_vector:
           {
             idx_vector_rep * r = dynamic_cast<idx_vector_rep *> (rep);
-            octave_idx_type *data = r->get_data ();
+            const octave_idx_type *data = r->get_data ();
             octave_idx_type i;
             for (i = 0; i < len && body (data[i]); i++) ;
             ret = i;
--- a/scripts/ChangeLog	Sun Mar 08 19:18:44 2009 +0100
+++ b/scripts/ChangeLog	Mon Mar 09 10:59:19 2009 +0100
@@ -1,3 +1,8 @@
+2009-03-09  Jaroslav Hajek  <highegg@gmail.com>
+
+	* general/accumarray.m: Reorder tests. Call either "sparse" or
+	__accumarray_sum__ for the default summation case.
+
 2009-03-08  Søren Hauberg <hauberg@gmail.com>
 
 	* statistics/base/histc.m: New function.
--- a/scripts/general/accumarray.m	Sun Mar 08 19:18:44 2009 +0100
+++ b/scripts/general/accumarray.m	Mon Mar 09 10:59:19 2009 +0100
@@ -1,4 +1,5 @@
 ## Copyright (C) 2007, 2008, 2009 David Bateman
+## Copyright (C) 2009 VZLU Prague
 ##
 ## This file is part of Octave.
 ##
@@ -64,20 +65,6 @@
   endif
   ndims = size (subs, 2);
 
-  if (nargin < 3 || isempty (sz))
-    sz = max (subs);
-    if (isscalar(sz))
-      sz = [sz, 1];
-    endif
-  elseif (length (sz) != ndims
-	  && (ndims != 1 || length (sz) != 2 || sz(2) != 1))
-    error ("accumarray: inconsistent dimensions");
-  endif
-  
-  if (nargin < 4 || isempty (fun))
-    fun = @sum;
-  endif
-
   if (nargin < 5 || isempty (fillval))
     fillval = 0;
   endif
@@ -90,6 +77,71 @@
     error ("accumarray: sparse matrices limited to 2 dimensions");
   endif
 
+  if (nargin < 4 || isempty (fun))
+    fun = @sum;
+    ## This is the fast summation case. Unlike the general case,
+    ## this case will be handled using an O(N) algorithm.
+
+    if (isspar && fillval == 0)
+      ## The "sparse" function can handle this case.
+
+      if ((nargin < 3 || isempty (sz)))
+        A = sparse (subs(:,1), subs(:,2), val);
+      elseif (length (sz) == 2)
+        A = sparse (subs(:,1), subs(:,2), val, sz(1), sz(2));
+      else
+        error ("accumarray: dimensions mismatch")
+      endif
+    else
+      ## This case is handled by an internal function.
+
+      if (ndims > 1)
+        if ((nargin < 3 || isempty (sz)))
+          sz = max (subs);
+        elseif (ndims != length (sz))
+          error ("accumarray: dimensions mismatch")
+        elseif (any (max (subs) > sz))
+          error ("accumarray: index out of range")
+        endif
+
+        ## Convert multidimensional subscripts.
+        subs = sub2ind (sz, mat2cell (subs, rows (subs), ones (1, ndims)){:});
+      elseif (nargin < 3)
+        ## In case of linear indexing, the fast built-in accumulator
+        ## will determine the extent for us.
+        sz = [];
+      endif
+
+      ## Call the built-in accumulator.
+      if (isempty (sz))
+        A = __accumarray_sum__ (subs, val);
+      else
+        A = __accumarray_sum__ (subs, val, prod (sz));
+        ## set proper shape.
+        A = reshape (A, sz);
+      endif
+
+      ## we fill in nonzero fill value.
+      if (fillval != 0)
+        mask = true (size (A));
+        mask(subs) = false;
+        A(mask) = fillval;
+      endif
+    endif
+
+    return
+  endif
+
+  if (nargin < 3 || isempty (sz))
+    sz = max (subs);
+    if (isscalar(sz))
+      sz = [sz, 1];
+    endif
+  elseif (length (sz) != ndims
+	  && (ndims != 1 || length (sz) != 2 || sz(2) != 1))
+    error ("accumarray: inconsistent dimensions");
+  endif
+  
   [subs, idx] = sortrows (subs);
 
   if (isscalar (val))
--- a/src/ChangeLog	Sun Mar 08 19:18:44 2009 +0100
+++ b/src/ChangeLog	Mon Mar 09 10:59:19 2009 +0100
@@ -1,3 +1,8 @@
+2009-03-09  Jaroslav Hajek  <highegg@gmail.com>
+
+	* data.cc (F__accumarray_sum__): New function.
+	(do_accumarray_sum): New helper template function.
+
 2009-03-07  Jaroslav Hajek  <highegg@gmail.com>
 
 	* xdiv.cc (mdm_div_impl, dmm_lelftdiv_impl, dmdm_div_impl,
--- a/src/data.cc	Sun Mar 08 19:18:44 2009 +0100
+++ b/src/data.cc	Mon Mar 09 10:59:19 2009 +0100
@@ -5724,6 +5724,78 @@
   return retval;
 }
 
+template <class NDT>
+static NDT 
+do_accumarray_sum (const idx_vector& idx, const NDT& vals,
+                   octave_idx_type n = -1)
+{
+  typedef typename NDT::element_type T;
+  if (n < 0)
+    n = idx.extent (0);
+  else if (idx.extent (n) > n)
+    error ("accumarray: index out of range");
+
+  // FIXME: the class tree in liboctave is overly complicated, hence the
+  // following type gymnastics.
+  MArray<T> array;
+
+  if (vals.numel () == 1)
+    {
+      array = MArray<T> (n, T ());
+      array.idx_add (idx, vals (0));
+    }
+  else if (vals.length () == idx.length (n))
+    {
+      array = MArray<T> (n, T ());
+      array.idx_add (idx, MArray<T> (vals));
+    }
+  else
+    error ("accumarray: dimensions mismatch");
+
+  return NDT (MArrayN<T> (ArrayN<T> (array)));
+}
+
+DEFUN (__accumarray_sum__, args, ,
+  "-*- texinfo -*-\n\
+@deftypefn {Built-in Function} {} __accumarray_sum__ (@var{idx}, @var{vals}, @var{n})\n\
+Undocumented internal function.\n\
+@end deftypefn")
+{
+  octave_value retval;
+  int nargin = args.length ();
+  if (nargin >= 2 && nargin <= 3 && args(0).is_numeric_type ())
+    {
+      idx_vector idx = args(0).index_vector ();
+      octave_idx_type n = -1;
+      if (nargin == 3)
+        n = args(2).idx_type_value (true);
+
+      if (! error_state)
+        {
+          octave_value vals = args(1);
+          if (vals.is_single_type ())
+            {
+              if (vals.is_complex_type ())
+                retval = do_accumarray_sum (idx, vals.float_complex_array_value (), n);
+              else
+                retval = do_accumarray_sum (idx, vals.float_array_value (), n);
+            }
+          else if (vals.is_numeric_type ())
+            {
+              if (vals.is_complex_type ())
+                retval = do_accumarray_sum (idx, vals.complex_array_value (), n);
+              else
+                retval = do_accumarray_sum (idx, vals.array_value (), n);
+            }
+          else
+            gripe_wrong_type_arg ("accumarray", vals);
+        }
+    }
+  else
+    print_usage ();
+
+  return retval;  
+}
 
 
 /*