changeset 13139:aa4a23337a0f

Enable BSX in-place for missing assignment operators * bsxfun-defs.cc (do_inplace_bsxfun_op): New function. * bsxfun.h (is_valid_bsxfun): Fix logic, had bug with empty dimensions. (is_valid_inplace_bsxfun): New function. * mx-inlines.cc (DEFMXBOOLOPEQ): Add missing function for vector-by-scalar operation. (do_mm_inplace_op): Call new inplace_bsxfun functions. * MArray.cc (MArray::operator+, MArray::operator-, MArray::product_eq, MArray::quotient_eq): Change calling form for do_mm_in_place_op. * boolNDArray.cc (boolNDArray::mx_el_and_assign, boolNDArray::mx_el_or_assign): Ditto
author Jordi Gutiérrez Hermoso <jordigh@octave.org>
date Thu, 15 Sep 2011 05:11:46 -0500
parents 52c5799130c2
children 98d23b0f16e1
files liboctave/MArray.cc liboctave/boolNDArray.cc liboctave/bsxfun-defs.cc liboctave/bsxfun.h liboctave/mx-inlines.cc
diffstat 5 files changed, 115 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/liboctave/MArray.cc	Wed Sep 14 19:59:24 2011 -0400
+++ b/liboctave/MArray.cc	Thu Sep 15 05:11:46 2011 -0500
@@ -264,7 +264,7 @@
   if (a.is_shared ())
     a = a + b;
   else
-    do_mm_inplace_op<T, T> (a, b, mx_inline_add2, "+=");
+    do_mm_inplace_op<T, T> (a, b, mx_inline_add2, mx_inline_add2, "+=");
   return a;
 }
 
@@ -275,7 +275,7 @@
   if (a.is_shared ())
     a = a - b;
   else
-    do_mm_inplace_op<T, T> (a, b, mx_inline_sub2, "-=");
+    do_mm_inplace_op<T, T> (a, b, mx_inline_sub2, mx_inline_sub2, "-=");
   return a;
 }
 
@@ -287,7 +287,7 @@
   if (a.is_shared ())
     return a = product (a, b);
   else
-    do_mm_inplace_op<T, T> (a, b, mx_inline_mul2, ".*=");
+    do_mm_inplace_op<T, T> (a, b, mx_inline_mul2, mx_inline_mul2, ".*=");
   return a;
 }
 
@@ -298,7 +298,7 @@
   if (a.is_shared ())
     return a = quotient (a, b);
   else
-    do_mm_inplace_op<T, T> (a, b, mx_inline_div2, "./=");
+    do_mm_inplace_op<T, T> (a, b, mx_inline_div2, mx_inline_div2, "./=");
   return a;
 }
 
--- a/liboctave/boolNDArray.cc	Wed Sep 14 19:59:24 2011 -0400
+++ b/liboctave/boolNDArray.cc	Thu Sep 15 05:11:46 2011 -0500
@@ -149,7 +149,8 @@
   if (a.is_shared ())
     a = mx_el_and (a, b);
   else
-    do_mm_inplace_op<bool, bool> (a, b, mx_inline_and2, "operator &=");
+    do_mm_inplace_op<bool, bool> (a, b, mx_inline_and2, mx_inline_and2,
+                                  "operator &=");
 
   return a;
 }
@@ -160,7 +161,8 @@
   if (a.is_shared ())
     a = mx_el_or (a, b);
   else
-    do_mm_inplace_op<bool, bool> (a, b, mx_inline_or2, "operator |=");
+    do_mm_inplace_op<bool, bool> (a, b, mx_inline_or2, mx_inline_or2,
+                                  "operator |=");
 
   return a;
 }
--- a/liboctave/bsxfun-defs.cc	Wed Sep 14 19:59:24 2011 -0400
+++ b/liboctave/bsxfun-defs.cc	Thu Sep 15 05:11:46 2011 -0500
@@ -134,6 +134,77 @@
   return retval;
 }
 
+template <class R, class X>
+void
+do_inplace_bsxfun_op (Array<R>& r, const Array<X>& x,
+                      void (*op_vv) (size_t, R *, const X *),
+                      void (*op_vs) (size_t, R *, X))
+{
+  dim_vector dvr = r.dims (), dvx = x.dims ();
+  octave_idx_type nd = r.ndims ();
+  dvx.redim (nd);
+
+  const X* xvec = x.fortran_vec ();
+  R* rvec = r.fortran_vec ();
+
+  // Fold the common leading dimensions.
+  octave_idx_type start, ldr = 1;
+  for (start = 0; start < nd; start++)
+    {
+      if (dvr(start) != dvx(start))
+        break;
+      ldr *= dvr(start);
+    }
+
+  if (r.is_empty ())
+    ; // do nothing
+  else if (start == nd)
+    op_vv (r.numel (), rvec, xvec);
+  else
+    {
+      // Determine the type of the low-level loop.
+      bool xsing = false;
+      if (ldr == 1)
+        {
+          xsing = dvx(start) == 1;
+          if (xsing)
+            {
+              ldr *= dvr(start) * dvx(start);
+              start++;
+            }
+        }
+
+      dim_vector cdvx = dvx.cumulative ();
+      // Nullify singleton dims to achieve a spread effect.
+      for (int i = std::max (start, 1); i < nd; i++)
+        {
+          if (dvx(i) == 1)
+            cdvx(i-1) = 0;
+        }
+
+      octave_idx_type niter = dvr.numel (start);
+      // The index array.
+      OCTAVE_LOCAL_BUFFER_INIT (octave_idx_type, idx, nd, 0);
+      for (octave_idx_type iter = 0; iter < niter; iter++)
+        {
+          octave_quit ();
+
+          // Compute indices.
+          // FIXME: performance impact noticeable?
+          octave_idx_type xidx = cdvx.cum_compute_index (idx);
+          octave_idx_type ridx = dvr.compute_index (idx);
+
+          // Apply the low-level loop.
+          if (xsing)
+            op_vs (ldr, rvec + ridx, xvec[xidx]);
+          else
+            op_vv (ldr, rvec + ridx, xvec + xidx);
+
+          dvr.increment_index (idx + start, start);
+        }
+    }
+}
+
 #define BSXFUN_OP_DEF(OP, ARRAY) \
 ARRAY bsxfun_ ## OP (const ARRAY& x, const ARRAY& y)
 
--- a/liboctave/bsxfun.h	Wed Sep 14 19:59:24 2011 -0400
+++ b/liboctave/bsxfun.h	Thu Sep 15 05:11:46 2011 -0500
@@ -33,7 +33,31 @@
 {
   for (int i = 0; i < std::min (dx.length (), dy.length ()); i++)
     {
-      if ( dx(i) > 1 && dy(i) > 1 && dx(i) != dy(i))
+      octave_idx_type xk = dx(i), yk = dy(i);
+      // Check the three conditions for valid bsxfun dims
+      if (! ( (xk == yk) || (xk == 1 && yk > 1) || (xk > 1 && yk == 1)))
+        return false;
+    }
+  return true;
+}
+
+// since we can't change the size of the assigned-to matrix, we cannot
+// apply singleton expansion to it, so the conditions to check are
+// different here.
+inline
+bool
+is_valid_inplace_bsxfun (const dim_vector& dr, const dim_vector& dx)
+{
+  octave_idx_type drl = dr.length (), dxl = dx.length ();
+  if (drl < dxl)
+    return false;
+
+  for (int i = 0; i < drl; i++)
+    {
+      octave_idx_type rk = dr(i), xk = dx(i);
+
+      // Only two valid canditions to check; can't stretch rk
+      if (! ( (rk == xk) || (rk > 1 && xk == 1)))
         return false;
     }
   return true;
--- a/liboctave/mx-inlines.cc	Wed Sep 14 19:59:24 2011 -0400
+++ b/liboctave/mx-inlines.cc	Thu Sep 15 05:11:46 2011 -0500
@@ -169,6 +169,9 @@
   for (size_t i = 0; i < n; i++) \
     r[i] OP logical_value (x[i]); \
 } \
+template <class X> \
+inline void F (size_t n, bool *r, X x) throw () \
+{ for (size_t i = 0; i < n; i++) r[i] OP x; }
 
 DEFMXBOOLOPEQ (mx_inline_and2, &=)
 DEFMXBOOLOPEQ (mx_inline_or2, |=)
@@ -391,11 +394,18 @@
 inline Array<R>&
 do_mm_inplace_op (Array<R>& r, const Array<X>& x,
                   void (*op) (size_t, R *, const X *) throw (),
+                  void (*op1) (size_t, R *, X) throw (),
                   const char *opname)
 {
   dim_vector dr = r.dims (), dx = x.dims ();
   if (dr == dx)
-    op (r.length (), r.fortran_vec (), x.data ());
+    {
+      op (r.length (), r.fortran_vec (), x.data ());
+    }
+  else if (is_valid_inplace_bsxfun (dr, dx))
+    {
+      do_inplace_bsxfun_op (r, x, op, op1);
+    }
   else
     gripe_nonconformant (opname, dr, dx);
   return r;