diff libinterp/corefcn/kron.cc @ 25485:38a07d930ccd

Improve kron performance by 17% by fewer calls to octave_quit() (bug #54005). * kron.cc: Move octave_quit() from innermost of triple "for" loops to outermost loop. Ctrl+C responsiveness is not impacted since humans expect results in hundreds of milliseconds.
author Rik <rik@octave.org>
date Wed, 20 Jun 2018 10:51:45 -0700
parents 6652d3823428
children 00f796120a6d
line wrap: on
line diff
--- a/libinterp/corefcn/kron.cc	Tue Jun 19 09:18:44 2018 -0700
+++ b/libinterp/corefcn/kron.cc	Wed Jun 20 10:51:45 2018 -0700
@@ -64,13 +64,17 @@
   T *cv = c.fortran_vec ();
 
   for (octave_idx_type ja = 0; ja < nca; ja++)
-    for (octave_idx_type jb = 0; jb < ncb; jb++)
-      for (octave_idx_type ia = 0; ia < nra; ia++)
+    {
+      octave_quit ();
+      for (octave_idx_type jb = 0; jb < ncb; jb++)
         {
-          octave_quit ();
-          mx_inline_mul (nrb, cv, a(ia, ja), b.data () + nrb*jb);
-          cv += nrb;
+          for (octave_idx_type ia = 0; ia < nra; ia++)
+            {
+              mx_inline_mul (nrb, cv, a(ia, ja), b.data () + nrb*jb);
+              cv += nrb;
+            }
         }
+    }
 
   return c;
 }
@@ -90,12 +94,14 @@
   MArray<T> c (dim_vector (nra*nrb, nca*ncb), T ());
 
   for (octave_idx_type ja = 0; ja < dla; ja++)
-    for (octave_idx_type jb = 0; jb < ncb; jb++)
-      {
-        octave_quit ();
-        mx_inline_mul (nrb, &c.xelem (ja*nrb, ja*ncb + jb), a.dgelem (ja),
-                       b.data () + nrb*jb);
-      }
+    {
+      octave_quit ();
+      for (octave_idx_type jb = 0; jb < ncb; jb++)
+        {
+          mx_inline_mul (nrb, &c.xelem (ja*nrb, ja*ncb + jb), a.dgelem (ja),
+                         b.data () + nrb*jb);
+        }
+    }
 
   return c;
 }
@@ -111,22 +117,24 @@
   C.cidx (0) = 0;
 
   for (octave_idx_type Aj = 0; Aj < A.columns (); Aj++)
-    for (octave_idx_type Bj = 0; Bj < B.columns (); Bj++)
-      {
-        octave_quit ();
-        for (octave_idx_type Ai = A.cidx (Aj); Ai < A.cidx (Aj+1); Ai++)
-          {
-            octave_idx_type Ci = A.ridx (Ai) * B.rows ();
-            const T v = A.data (Ai);
+    {
+      octave_quit ();
+      for (octave_idx_type Bj = 0; Bj < B.columns (); Bj++)
+        {
+          for (octave_idx_type Ai = A.cidx (Aj); Ai < A.cidx (Aj+1); Ai++)
+            {
+              octave_idx_type Ci = A.ridx (Ai) * B.rows ();
+              const T v = A.data (Ai);
 
-            for (octave_idx_type Bi = B.cidx (Bj); Bi < B.cidx (Bj+1); Bi++)
-              {
-                C.data (idx) = v * B.data (Bi);
-                C.ridx (idx++) = Ci + B.ridx (Bi);
-              }
-          }
-        C.cidx (Aj * B.columns () + Bj + 1) = idx;
-      }
+              for (octave_idx_type Bi = B.cidx (Bj); Bi < B.cidx (Bj+1); Bi++)
+                {
+                  C.data (idx) = v * B.data (Bi);
+                  C.ridx (idx++) = Ci + B.ridx (Bi);
+                }
+            }
+          C.cidx (Aj * B.columns () + Bj + 1) = idx;
+        }
+    }
 
   return C;
 }