Mercurial > octave
changeset 25485:38a07d930ccd
Improve kron performance by 17% by fewer calls to octave_quit() (bug #54005).
* kron.cc: Move octave_quit() from innermost of triple "for" loops to outermost
loop. Ctrl+C responsiveness is not impacted since humans expect results in
hundreds of milliseconds.
author | Rik <rik@octave.org> |
---|---|
date | Wed, 20 Jun 2018 10:51:45 -0700 |
parents | b7db401e1a99 |
children | dffd9f6ee85c |
files | libinterp/corefcn/kron.cc |
diffstat | 1 files changed, 34 insertions(+), 26 deletions(-) [+] |
line wrap: on
line diff
--- a/libinterp/corefcn/kron.cc Tue Jun 19 09:18:44 2018 -0700 +++ b/libinterp/corefcn/kron.cc Wed Jun 20 10:51:45 2018 -0700 @@ -64,13 +64,17 @@ T *cv = c.fortran_vec (); for (octave_idx_type ja = 0; ja < nca; ja++) - for (octave_idx_type jb = 0; jb < ncb; jb++) - for (octave_idx_type ia = 0; ia < nra; ia++) + { + octave_quit (); + for (octave_idx_type jb = 0; jb < ncb; jb++) { - octave_quit (); - mx_inline_mul (nrb, cv, a(ia, ja), b.data () + nrb*jb); - cv += nrb; + for (octave_idx_type ia = 0; ia < nra; ia++) + { + mx_inline_mul (nrb, cv, a(ia, ja), b.data () + nrb*jb); + cv += nrb; + } } + } return c; } @@ -90,12 +94,14 @@ MArray<T> c (dim_vector (nra*nrb, nca*ncb), T ()); for (octave_idx_type ja = 0; ja < dla; ja++) - for (octave_idx_type jb = 0; jb < ncb; jb++) - { - octave_quit (); - mx_inline_mul (nrb, &c.xelem (ja*nrb, ja*ncb + jb), a.dgelem (ja), - b.data () + nrb*jb); - } + { + octave_quit (); + for (octave_idx_type jb = 0; jb < ncb; jb++) + { + mx_inline_mul (nrb, &c.xelem (ja*nrb, ja*ncb + jb), a.dgelem (ja), + b.data () + nrb*jb); + } + } return c; } @@ -111,22 +117,24 @@ C.cidx (0) = 0; for (octave_idx_type Aj = 0; Aj < A.columns (); Aj++) - for (octave_idx_type Bj = 0; Bj < B.columns (); Bj++) - { - octave_quit (); - for (octave_idx_type Ai = A.cidx (Aj); Ai < A.cidx (Aj+1); Ai++) - { - octave_idx_type Ci = A.ridx (Ai) * B.rows (); - const T v = A.data (Ai); + { + octave_quit (); + for (octave_idx_type Bj = 0; Bj < B.columns (); Bj++) + { + for (octave_idx_type Ai = A.cidx (Aj); Ai < A.cidx (Aj+1); Ai++) + { + octave_idx_type Ci = A.ridx (Ai) * B.rows (); + const T v = A.data (Ai); - for (octave_idx_type Bi = B.cidx (Bj); Bi < B.cidx (Bj+1); Bi++) - { - C.data (idx) = v * B.data (Bi); - C.ridx (idx++) = Ci + B.ridx (Bi); - } - } - C.cidx (Aj * B.columns () + Bj + 1) = idx; - } + for (octave_idx_type Bi = B.cidx (Bj); Bi < B.cidx (Bj+1); Bi++) + { + C.data (idx) = v * B.data (Bi); + C.ridx (idx++) = Ci + B.ridx (Bi); + } + } + C.cidx (Aj * B.columns () + Bj + 1) = idx; + } + } return C; }