changeset 10268:9a16a61ed43d

new optimizations for accumarray
author Jaroslav Hajek <highegg@gmail.com>
date Fri, 05 Feb 2010 12:09:21 +0100
parents 479c7df0cc96
children 217d36560dfa
files liboctave/ChangeLog liboctave/MArrayN.cc liboctave/MArrayN.h liboctave/lo-mappers.h scripts/ChangeLog scripts/general/accumarray.m src/ChangeLog src/data.cc
diffstat 8 files changed, 366 insertions(+), 76 deletions(-) [+]
line wrap: on
line diff
--- a/liboctave/ChangeLog	Thu Feb 04 14:36:49 2010 +0100
+++ b/liboctave/ChangeLog	Fri Feb 05 12:09:21 2010 +0100
@@ -1,3 +1,9 @@
+2010-02-05  Jaroslav Hajek  <highegg@gmail.com>
+
+	* MArrayN.cc (MArrayN::idx_min, MArrayN::idx_max): New methods.
+	* MArrayN.h: Declare them.
+	* lo-mappers.h (xmin, xmax): Define for general arguments.
+
 2010-02-04  Jaroslav Hajek  <highegg@gmail.com>
 
 	* chMatrix.h (charMatrix): Rebase directly on Array<char>.
--- a/liboctave/MArrayN.cc	Thu Feb 04 14:36:49 2010 +0100
+++ b/liboctave/MArrayN.cc	Fri Feb 05 12:09:21 2010 +0100
@@ -87,6 +87,52 @@
   idx.loop (len, _idxadda_helper<T> (this->fortran_vec (), vals.data ()));
 }
 
+template <class T, T op (typename ref_param<T>::type, typename ref_param<T>::type)>
+struct _idxbinop_helper
+{
+  T *array;
+  const T *vals;
+  _idxbinop_helper (T *a, const T *v) : array (a), vals (v) { }
+  void operator () (octave_idx_type i)
+    { array[i] = op (array[i], *vals++); }
+};
+
+template <class T>
+void
+MArrayN<T>::idx_min (const idx_vector& idx, const MArrayN<T>& vals)
+{
+  octave_idx_type n = this->length ();
+  octave_idx_type ext = idx.extent (n);
+  if (ext > n)
+    {
+      this->resize (ext);
+      n = ext;
+    }
+
+  octave_quit ();
+
+  octave_idx_type len = std::min (idx.length (n), vals.length ());
+  idx.loop (len, _idxbinop_helper<T, xmin> (this->fortran_vec (), vals.data ()));
+}
+
+template <class T>
+void
+MArrayN<T>::idx_max (const idx_vector& idx, const MArrayN<T>& vals)
+{
+  octave_idx_type n = this->length ();
+  octave_idx_type ext = idx.extent (n);
+  if (ext > n)
+    {
+      this->resize (ext);
+      n = ext;
+    }
+
+  octave_quit ();
+
+  octave_idx_type len = std::min (idx.length (n), vals.length ());
+  idx.loop (len, _idxbinop_helper<T, xmax> (this->fortran_vec (), vals.data ()));
+}
+
 // N-dimensional array with math ops.
 template <class T>
 void
--- a/liboctave/MArrayN.h	Thu Feb 04 14:36:49 2010 +0100
+++ b/liboctave/MArrayN.h	Fri Feb 05 12:09:21 2010 +0100
@@ -93,6 +93,10 @@
 
   void idx_add (const idx_vector& idx, const MArrayN<T>& vals);
 
+  void idx_min (const idx_vector& idx, const MArrayN<T>& vals);
+
+  void idx_max (const idx_vector& idx, const MArrayN<T>& vals);
+
   void changesign (void);
 };
 
--- a/liboctave/lo-mappers.h	Thu Feb 04 14:36:49 2010 +0100
+++ b/liboctave/lo-mappers.h	Fri Feb 05 12:09:21 2010 +0100
@@ -69,6 +69,14 @@
 extern OCTAVE_API bool octave_is_NA (double x);
 extern OCTAVE_API bool octave_is_NaN_or_NA (double x) GCC_ATTR_DEPRECATED;
 
+// Generic xmin, xmax definitions
+template <class T>
+inline T xmin (T x, T y)
+{ return x <= y ? x : y; }
+template <class T>
+inline T xmax (T x, T y)
+{ return x >= y ? x : y; }
+
 // This form is favorable. GCC will translate (x <= y ? x : y) without a jump,
 // hence the only conditional jump involved will be the first (xisnan), infrequent
 // and hence friendly to branch prediction.
--- a/scripts/ChangeLog	Thu Feb 04 14:36:49 2010 +0100
+++ b/scripts/ChangeLog	Fri Feb 05 12:09:21 2010 +0100
@@ -1,3 +1,9 @@
+2010-02-05  Jaroslav Hajek  <highegg@gmail.com>
+
+	* general/accumarray.m: Rewrite. Split sparse and dense case. Treat
+	cell-valued subs efficiently. Optimize dense case for @sum, @max and
+	@min. Optimize the @(x){x} reduction. Add tests.
+
 2010-02-04  Jaroslav Hajek  <highegg@gmail.com>
 
 	* miscellaneous/dir.m: Fix month passed to datenum.
--- a/scripts/general/accumarray.m	Thu Feb 04 14:36:49 2010 +0100
+++ b/scripts/general/accumarray.m	Fri Feb 05 12:09:21 2010 +0100
@@ -54,65 +54,107 @@
 ## @end example
 ## @end deftypefn
 
-function A = accumarray (subs, val, sz, func, fillval, isspar)  
+function A = accumarray (subs, val, sz = [], func = [], fillval = [], isspar = [])  
 
   if (nargin < 2 || nargin > 6)
     print_usage ();
   endif
 
   if (iscell (subs))
-    subs = cell2mat (cellfun (@(x) x(:), subs, "UniformOutput", false));
+    subs = cellfun (@(x) x(:), subs, "UniformOutput", false);
+    ndims = numel (subs);
+    if (ndims == 1)
+      subs = subs{1};
+    endif
+  else
+    ndims = columns (subs);
   endif
-  ndims = size (subs, 2);
 
-  if (nargin < 5 || isempty (fillval))
+  if (isempty (fillval))
     fillval = 0;
   endif
 
-  if (nargin < 6 || isempty (isspar))
+  if (isempty (isspar))
     isspar = false;
   endif
 
-  if (isspar && ndims > 2)
-    error ("accumarray: sparse matrices limited to 2 dimensions");
-  endif
+  if (isspar)
+
+    ## Sparse case. Avoid linearizing the subscripts, because it could overflow.
+
+    if (fillval != 0)
+      error ("accumarray: fillval must be zero in the sparse case");
+    endif
+
+    ## Ensure subscripts are a two-column matrix.
+    if (iscell (subs))
+      subs = [subs{:}];
+    endif
 
-  if (nargin < 4 || isempty (func))
-    func = @sum;
-    ## This is the fast summation case. Unlike the general case,
-    ## this case will be handled using an O(N) algorithm.
+    ## Validate dimensions.
+    if (ndims == 1)
+      subs(:,2) = 1;
+    elseif (ndims != 2)
+      error ("accumarray: in the sparse case, needs 1 or 2 subscripts");
+    endif
+
+    if (isnumeric (val) || islogical (val))
+      vals = double (val);
+    else
+      error ("accumarray: in the sparse case, values must be numeric or logical");
+    endif
+
+    if (! (isempty (func) || func == @sum))
 
-    if (isspar && fillval == 0)
-      ## The "sparse" function can handle this case.
+      ## Reduce values. This is not needed if we're about to sum them, because
+      ## "sparse" can do that.
+      
+      ## Sort indices.
+      [subs, idx] = sortrows (subs);
+      n = rows (subs);
+      ## Identify runs.
+      jdx = find (any (diff (subs, 1, 1), 2));
+      jdx = [jdx; n];
+
+      val = cellfun (func, mat2cell (val(:)(idx), diff ([0; jdx])));
+      subs = subs(jdx, :);
+    endif
 
-      if ((nargin < 3 || isempty (sz)))
-        A = sparse (subs(:,1), subs(:,2), val);
-      elseif (length (sz) == 2)
-        A = sparse (subs(:,1), subs(:,2), val, sz(1), sz(2));
-      else
+    ## Form the sparse matrix.
+    if (isempty (sz))
+      A = sparse (subs(:,1), subs(:,2), val);
+    elseif (length (sz) == 2)
+      A = sparse (subs(:,1), subs(:,2), val, sz(1), sz(2));
+    else
+      error ("accumarray: dimensions mismatch")
+    endif
+
+  else
+
+    ## Linearize subscripts.
+    if (ndims > 1)
+      if (isempty (sz))
+        if (iscell (subs))
+          sz = cellfun (@max, subs);
+        else
+          sz = max (subs, [], 1);
+        endif
+      elseif (ndims != length (sz))
         error ("accumarray: dimensions mismatch")
       endif
-    else
-      ## This case is handled by an internal function.
 
-      if (ndims > 1)
-        if ((nargin < 3 || isempty (sz)))
-          sz = max (subs);
-        elseif (ndims != length (sz))
-          error ("accumarray: dimensions mismatch")
-        elseif (any (max (subs) > sz))
-          error ("accumarray: index out of range")
-        endif
+      ## Convert multidimensional subscripts.
+      if (ismatrix (subs))
+        subs = num2cell (subs, 1);
+      endif
+      subs = sub2ind (sz, subs{:});
+    endif
 
-        ## Convert multidimensional subscripts.
-        subs = sub2ind (sz, num2cell (subs, 1){:});
-      elseif (nargin < 3)
-        ## In case of linear indexing, the fast built-in accumulator
-        ## will determine the extent for us.
-        sz = [];
-      endif
+
+    ## Some built-in reductions handled efficiently.
 
-      ## Call the built-in accumulator.
+    if (isempty (func) || func == @sum)
+      ## Fast summation.
       if (isempty (sz))
         A = __accumarray_sum__ (subs, val);
       else
@@ -127,47 +169,87 @@
         mask(subs) = false;
         A(mask) = fillval;
       endif
-    endif
+    elseif (func == @max)
+      ## Fast maximization.
 
-    return
-  endif
+      if (isinteger (val))
+        zero = intmin (class (val));
+      elseif (fillval == 0 && all (val(:) >= 0))
+        ## This is a common case - fillval is zero, all numbers nonegative.
+        zero = 0;
+      else
+        zero = NaN; # Neutral value.
+      endif
+
+      if (isempty (sz))
+        A = __accumarray_max__ (subs, val, zero);
+      else
+        A = __accumarray_max__ (subs, val, zero, prod (sz));
+        A = reshape (A, sz);
+      endif
 
-  if (nargin < 3 || isempty (sz))
-    sz = max (subs);
-    if (isscalar(sz))
-      sz = [sz, 1];
-    endif
-  elseif (length (sz) != ndims
-	  && (ndims != 1 || length (sz) != 2 || sz(2) != 1))
-    error ("accumarray: inconsistent dimensions");
-  endif
-  
-  [subs, idx] = sortrows (subs);
+      if (fillval != zero && isnan (fillval) != isnan (zero))
+        mask = true (size (A));
+        mask(subs) = false;
+        A(mask) = fillval;
+      endif
+    elseif (func == @min)
+      ## Fast minimization.
+
+      if (isinteger (val))
+        zero = intmax (class (val));
+      else
+        zero = NaN; # Neutral value.
+      endif
+
+      if (isempty (sz))
+        A = __accumarray_min__ (subs, val, zero);
+      else
+        A = __accumarray_min__ (subs, val, zero, prod (sz));
+        A = reshape (A, sz);
+      endif
 
-  if (isscalar (val))
-    val = repmat (size (idx));
-  else
-    val = val(idx);
-  endif
-  cidx = find ([true; (sum (abs (diff (subs)), 2) != 0)]);
-  idx = cell (1, ndims);
-  for i = 1:ndims
-    idx{i} = subs (cidx, i);
-  endfor
-  x = cellfun (func, mat2cell (val(:), diff ([cidx; length(val) + 1])));
-  if (isspar && fillval == 0)
-    A = sparse (idx{1}, idx{2}, x, sz(1), sz(2));
-  else
-    if (iscell (x))
-      ## Why did matlab choose to reverse the order of the elements
-      x = cellfun (@(x) flipud (x(:)), x, "UniformOutput", false);
-      A = cell (sz);
-    elseif (fillval == 0)
-      A = zeros (sz, class (x));
-    else 
-      A = fillval .* ones (sz);
+      if (fillval != zero && isnan (fillval) != isnan (zero))
+        mask = true (size (A));
+        mask(subs) = false;
+        A(mask) = fillval;
+      endif
+    else
+
+      ## The general case. Reduce values. 
+      n = rows (subs);
+      if (numel (val) == 1)
+        val = val(ones (1, n), 1);
+      else
+        val = val(:);
+      endif
+      
+      ## Sort indices.
+      [subs, idx] = sort (subs);
+      ## Identify runs.
+      jdx = find (diff (subs, 1, 1));
+      jdx = [jdx; n];
+      val = mat2cell (val(idx), diff ([0; jdx]));
+      ## Optimize the case when function is @(x) {x}, i.e. we just want to
+      ## collect the values to cells.
+      persistent simple_cell_str = func2str (@(x) {x});
+      if (! strcmp (func2str (func), simple_cell_str))
+        val = cellfun (func, val);
+      endif
+      subs = subs(jdx);
+
+      ## Construct matrix of fillvals.
+      if (iscell (val))
+        A = cell (sz);
+      elseif (fillval == 0)
+        A = zeros (sz, class (val));
+      else
+        A = repmat (fillval, sz);
+      endif
+
+      ## Set the reduced values.
+      A(subs) = val;
     endif
-    A(sub2ind (sz, idx{:})) = x;
   endif
 endfunction
 
@@ -183,4 +265,24 @@
 %!assert (accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],101:105,[2,4],@(x)length(x)>1),[false,false,false,false;true,false,true,false])
 %!test
 %! A = accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],101:105,[2,4],@(x){x});
-%! assert (A{2},[104;102])
+%! assert (A{2},[102;104])
+%!test
+%! subs = ceil (rand (2000, 3)*10);
+%! val = rand (2000, 1);
+%! assert (accumarray (subs, val, [], @max), accumarray (subs, val, [], @(x) max (x)));
+%!test
+%! subs = ceil (rand (2000, 1)*100);
+%! val = rand (2000, 1);
+%! assert (accumarray (subs, val, [100, 1], @min, NaN), accumarray (subs, val, [100, 1], @(x) min (x), NaN));
+%!test
+%! subs = ceil (rand (2000, 2)*30);
+%! subsc = num2cell (subs, 1);
+%! val = rand (2000, 1);
+%! assert (accumarray (subsc, val, [], [], 0, true), accumarray (subs, val, [], [], 0, true));
+%!test
+%! subs = ceil (rand (2000, 3)*10);
+%! subsc = num2cell (subs, 1);
+%! val = rand (2000, 1);
+%! assert (accumarray (subsc, val, [], @max), accumarray (subs, val, [], @max));
+
+
--- a/src/ChangeLog	Thu Feb 04 14:36:49 2010 +0100
+++ b/src/ChangeLog	Fri Feb 05 12:09:21 2010 +0100
@@ -1,3 +1,9 @@
+2010-02-05  Jaroslav Hajek  <highegg@gmail.com>
+
+	* data.cc (F__accumarray_sum__): Allow bool and char inputs.
+	(do_accumarray_minmax, do_accumarray_minmax_fun): New helper funcs.
+	(F__accumarray_min__, F__accumarray_max__): New defuns.
+
 2010-02-04  John W. Eaton  <jwe@octave.org>
 
 	* sysdep.cc: Don't include <sys/utsname.h>.
--- a/src/data.cc	Thu Feb 04 14:36:49 2010 +0100
+++ b/src/data.cc	Fri Feb 05 12:09:21 2010 +0100
@@ -6347,7 +6347,7 @@
               else
                 retval = do_accumarray_sum (idx, vals.float_array_value (), n);
             }
-          else if (vals.is_numeric_type ())
+          else if (vals.is_numeric_type () || vals.is_bool_type () || vals.is_string ())
             {
               if (vals.is_complex_type ())
                 retval = do_accumarray_sum (idx, vals.complex_array_value (), n);
@@ -6365,6 +6365,118 @@
 }
 
 template <class NDT>
+static NDT 
+do_accumarray_minmax (const idx_vector& idx, const NDT& vals,
+                      octave_idx_type n, bool ismin,
+                      const typename NDT::element_type& zero_val)
+{
+  typedef typename NDT::element_type T;
+  if (n < 0)
+    n = idx.extent (0);
+  else if (idx.extent (n) > n)
+    error ("accumarray: index out of range");
+
+  NDT retval (dim_vector (n, 1), zero_val);
+
+  // Pick minimizer or maximizer.
+  void (MArrayN<T>::*op) (const idx_vector&, const MArrayN<T>&) = 
+    ismin ? (&MArrayN<T>::idx_min) : (&MArrayN<T>::idx_max);
+
+  octave_idx_type l = idx.length (n);
+  if (vals.numel () == 1)
+    (retval.*op) (idx, NDT (dim_vector (l, 1), vals(0)));
+  else if (vals.numel () == l)
+    (retval.*op) (idx, vals);
+  else
+    error ("accumarray: dimensions mismatch");
+
+  return retval;
+}
+
+static octave_value_list
+do_accumarray_minmax_fun (const octave_value_list& args,
+                          bool ismin)
+{
+  octave_value retval;
+  int nargin = args.length ();
+  if (nargin >= 3 && nargin <= 4 && args(0).is_numeric_type ())
+    {
+      idx_vector idx = args(0).index_vector ();
+      octave_idx_type n = -1;
+      if (nargin == 4)
+        n = args(3).idx_type_value (true);
+
+      if (! error_state)
+        {
+          octave_value vals = args(1), zero = args (2);
+
+          switch (vals.builtin_type ())
+            {
+            case btyp_double:
+              retval = do_accumarray_minmax (idx, vals.array_value (), n, ismin,
+                                             zero.double_value ());
+              break;
+            case btyp_float:
+              retval = do_accumarray_minmax (idx, vals.float_array_value (), n, ismin,
+                                             zero.float_value ());
+              break;
+            case btyp_complex:
+              retval = do_accumarray_minmax (idx, vals.complex_array_value (), n, ismin,
+                                             zero.complex_value ());
+              break;
+            case btyp_float_complex:
+              retval = do_accumarray_minmax (idx, vals.float_complex_array_value (), n, ismin,
+                                             zero.float_complex_value ());
+              break;
+#define MAKE_INT_BRANCH(X) \
+            case btyp_ ## X: \
+              retval = do_accumarray_minmax (idx, vals.X ## _array_value (), n, ismin, \
+                                             zero.X ## _scalar_value ()); \
+              break
+
+            MAKE_INT_BRANCH (int8);
+            MAKE_INT_BRANCH (int16);
+            MAKE_INT_BRANCH (int32);
+            MAKE_INT_BRANCH (int64);
+            MAKE_INT_BRANCH (uint8);
+            MAKE_INT_BRANCH (uint16);
+            MAKE_INT_BRANCH (uint32);
+            MAKE_INT_BRANCH (uint64);
+#undef MAKE_INT_BRANCH
+            case btyp_bool:
+              retval = do_accumarray_minmax (idx, vals.array_value (), n, ismin,
+                                             zero.bool_value ());
+              break;
+            default:
+              gripe_wrong_type_arg ("accumarray", vals);
+            }
+        }
+    }
+  else
+    print_usage ();
+
+  return retval;  
+}
+
+DEFUN (__accumarray_min__, args, ,
+  "-*- texinfo -*-\n\
+@deftypefn {Built-in Function} {} __accumarray_min__ (@var{idx}, @var{vals}, @var{zero}, @var{n})\n\
+Undocumented internal function.\n\
+@end deftypefn")
+{
+  return do_accumarray_minmax_fun (args, true);
+}
+
+DEFUN (__accumarray_max__, args, ,
+  "-*- texinfo -*-\n\
+@deftypefn {Built-in Function} {} __accumarray_max__ (@var{idx}, @var{vals}, @var{zero}, @var{n})\n\
+Undocumented internal function.\n\
+@end deftypefn")
+{
+  return do_accumarray_minmax_fun (args, false);
+}
+
+template <class NDT>
 static NDT
 do_merge (const Array<bool>& mask,
           const NDT& tval, const NDT& fval)