changeset 8931:92dd386f0f13

optimize diag matrix division
author Jaroslav Hajek <highegg@gmail.com>
date Sat, 07 Mar 2009 22:09:25 +0100
parents dd11de67a3f9
children 2d0f8692a82e
files src/ChangeLog src/xdiv.cc
diffstat 2 files changed, 53 insertions(+), 37 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Sat Mar 07 15:12:25 2009 -0500
+++ b/src/ChangeLog	Sat Mar 07 22:09:25 2009 +0100
@@ -1,3 +1,8 @@
+2009-03-07  Jaroslav Hajek  <highegg@gmail.com>
+
+	* xdiv.cc (mdm_div_impl, dmm_lelftdiv_impl, dmdm_div_impl,
+	dmdm_leftdiv_impl): Optimize.
+
 2009-03-07  John W. Eaton  <jwe@octave.org>
 
 	* pr-output.cc (octave_print_internal (std::ostream&,
--- a/src/xdiv.cc	Sat Mar 07 15:12:25 2009 -0500
+++ b/src/xdiv.cc	Sat Mar 07 22:09:25 2009 +0100
@@ -736,24 +736,29 @@
   if (! mx_div_conform (a, d))
     return MT ();
 
-  octave_idx_type m = a.rows (), n = d.rows (), k = d.cols ();
+  octave_idx_type m = a.rows (), n = d.rows (), l = d.length ();
   MT x (m, n);
-  const typename DMT::element_type zero = typename DMT::element_type ();
+  typedef typename DMT::element_type S;
+  typedef typename MT::element_type T;
+  const T *aa = a.data ();
+  const S *dd = d.data ();
+  T *xx = x.fortran_vec ();
 
-  for (octave_idx_type j = 0; j < n; j++)
+  for (octave_idx_type j = 0; j < l; j++)
     {
-      if (j < k && d(j, j) != zero)
-        {
-          for (octave_idx_type i = 0; i < m; i++)
-            x(i, j) = a(i, j) / d(j, j);
-        }
+      const S del = dd[j];
+      if (del != S ())
+        for (octave_idx_type i = 0; i < m; i++)
+          xx[i] = aa[i] / del;
       else
-        {
-          for (octave_idx_type i = 0; i < m; i++)
-            x(i, j) = zero;
-        }
+        for (octave_idx_type i = 0; i < m; i++)
+          xx[i] = T ();
+      aa += m; xx += m;
     }
 
+  for (octave_idx_type i = l*m; i < n*m; i++)
+    xx[i] = T ();
+
   return x;
 }
 
@@ -812,17 +817,21 @@
   if (! mx_leftdiv_conform (d, a))
     return MT ();
 
-  octave_idx_type m = d.cols (), n = a.cols (), k = d.rows ();
-  octave_idx_type mk = m < k ? m : k;
+  octave_idx_type m = d.cols (), n = a.cols (), k = a.rows (), l = d.length ();
   MT x (m, n);
-  const typename DMT::element_type zero = typename DMT::element_type ();
+  typedef typename DMT::element_type S;
+  typedef typename MT::element_type T;
+  const T *aa = a.data ();
+  const S *dd = d.data ();
+  T *xx = x.fortran_vec ();
 
   for (octave_idx_type j = 0; j < n; j++)
     {
-      for (octave_idx_type i = 0; i < mk; i++)
-        x(i, j) = d(i, i) != zero ? a(i, j) / d(i, i) : 0;
-      for (octave_idx_type i = mk; i < m; i++)
-        x(i, j) = zero;
+      for (octave_idx_type i = 0; i < l; i++)
+        xx[i] = dd[i] != S () ? aa[i] / dd[i] : T ();
+      for (octave_idx_type i = l; i < m; i++)
+        xx[i] = T ();
+      aa += k; xx += m;
     }
 
   return x;
@@ -886,17 +895,18 @@
     return MT ();
 
   octave_idx_type m = a.rows (), n = d.rows (), k = d.cols ();
-  octave_idx_type mn = m < n ? m : n;
+  octave_idx_type l = std::min (m, n), lk = std::min (l, k);
   MT x (m, n);
-  const typename DMT::element_type zero = typename DMT::element_type ();
+  typedef typename DMT::element_type S;
+  typedef typename MT::element_type T;
+  const T *aa = a.data ();
+  const S *dd = d.data ();
+  T *xx = x.fortran_vec ();
 
-  for (octave_idx_type j = 0; j < mn; j++)
-    {
-      if (j < k && d(j, j) != zero)
-        x(j, j) = a(j, j) / d(j, j);
-      else
-        x(j, j) = zero;
-    }
+  for (octave_idx_type i = 0; i < lk; i++)
+    xx[i] = dd[i] != S () ? aa[i] / dd[i] : T ();
+  for (octave_idx_type i = lk; i < l; i++)
+    xx[i] = T ();
 
   return x;
 }
@@ -957,17 +967,18 @@
     return MT ();
 
   octave_idx_type m = d.cols (), n = a.cols (), k = d.rows ();
-  octave_idx_type mn = m < n ? m : n;
+  octave_idx_type l = std::min (m, n), lk = std::min (l, k);
   MT x (m, n);
-  const typename DMT::element_type zero = typename DMT::element_type ();
+  typedef typename DMT::element_type S;
+  typedef typename MT::element_type T;
+  const T *aa = a.data ();
+  const S *dd = d.data ();
+  T *xx = x.fortran_vec ();
 
-  for (octave_idx_type j = 0; j < mn; j++)
-    {
-      if (j < k && d(j, j) != zero)
-        x(j, j) = a(j, j) / d(j, j);
-      else
-        x(j, j) = zero;
-    }
+  for (octave_idx_type i = 0; i < lk; i++)
+    xx[i] = dd[i] != S () ? aa[i] / dd[i] : T ();
+  for (octave_idx_type i = lk; i < l; i++)
+    xx[i] = T ();
 
   return x;
 }