# HG changeset patch # User dbateman # Date 1136412218 0 # Node ID b4cb3f93c1e10d9d7b3034559390e3f056dcf1fb # Parent d37b96139376513997b3c214b76b3d08138c69c9 [project @ 2006-01-04 22:03:38 by dbateman] diff -r d37b96139376 -r b4cb3f93c1e1 liboctave/CSparse.cc --- a/liboctave/CSparse.cc Wed Dec 28 20:16:50 2005 +0000 +++ b/liboctave/CSparse.cc Wed Jan 04 22:03:38 2006 +0000 @@ -44,6 +44,8 @@ #include "sparse-util.h" #include "SparseCmplxCHOL.h" +#include "oct-sort.h" + // Fortran functions we call. extern "C" { diff -r d37b96139376 -r b4cb3f93c1e1 liboctave/ChangeLog --- a/liboctave/ChangeLog Wed Dec 28 20:16:50 2005 +0000 +++ b/liboctave/ChangeLog Wed Jan 04 22:03:38 2006 +0000 @@ -1,7 +1,18 @@ +2006-01-04 David Bateman + + * Spars-op-defs.h (SPARSE_SPARSE_MUL): Previous change resulted in + elements not being sorted in return matrix. Sort them, and make + solver select between two algorithms to further improve the + performance. + * dSparse.cc: include oct-sort.h. + * CSparse.cc: ditto. + * sparse-sort.cc: Instantiate octave_sort. + 2005-12-28 David Bateman - * Sparse-op-defs.h (SPARSE_SPARSE_MUL): Improved algorithm that is faster in - all cases, and significantly so for low density or small oder problems. + * Sparse-op-defs.h (SPARSE_SPARSE_MUL): Improved algorithm that is + faster in all cases, and significantly so for low density or small + order problems. 2005-11-30 John W. Eaton diff -r d37b96139376 -r b4cb3f93c1e1 liboctave/Sparse-op-defs.h --- a/liboctave/Sparse-op-defs.h Wed Dec 28 20:16:50 2005 +0000 +++ b/liboctave/Sparse-op-defs.h Wed Jan 04 22:03:38 2006 +0000 @@ -1560,12 +1560,12 @@ octave_idx_type col = a.ridx(j); \ for (octave_idx_type k = m.cidx(col) ; k < m.cidx(col+1); k++) \ { \ - OCTAVE_QUIT; \ if (w[m.ridx(k)] < i + 1) \ { \ w[m.ridx(k)] = i + 1; \ nel++; \ } \ + OCTAVE_QUIT; \ } \ } \ } \ @@ -1580,35 +1580,85 @@ OCTAVE_LOCAL_BUFFER (EL_TYPE, Xcol, nr); \ \ RET_TYPE retval (nr, a_nc, nel); \ - \ octave_idx_type ii = 0; \ - \ - retval.xcidx(0) = 0; \ - for (octave_idx_type i = 0; i < a_nc ; i++) \ + /* The optimal break-point as estimated from simulations */ \ + /* Note that Mergesort is O(nz log(nz)) while searching all */ \ + /* values is O(nr), where nz here is non-zero per row of */ \ + /* length nr. The test itself was then derived from the */ \ + /* simulation with random square matrices and the observation */ \ + /* of the number of non-zero elements in the output matrix */ \ + /* it was found that the breakpoints were */ \ + /* nr: 500 1000 2000 5000 10000 */ \ + /* nz: 6 25 97 585 2202 */ \ + /* The below is a simplication of the 'polyfit'-ed parameters */ \ + /* to these breakpoints */ \ + if (nr > 43000 || ((nr * nr) * double(a_nc)) / 43000 > nel) \ { \ - for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \ + octave_idx_type *ri = retval.xridx(); \ + octave_sort sort; \ + \ + retval.xcidx(0) = 0; \ + for (octave_idx_type i = 0; i < a_nc ; i++) \ { \ - octave_idx_type col = a.ridx(j); \ - EL_TYPE tmpval = a.data(j); \ - for (octave_idx_type k = m.cidx(col) ; k < m.cidx(col+1); k++) \ + for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \ { \ - OCTAVE_QUIT; \ - octave_idx_type row = m.ridx(k); \ - if (w[row] < i + 1) \ - { \ - w[row] = i + 1; \ - retval.xridx(ii++) = row; \ - Xcol[row] = tmpval * m.data(k); \ - } \ - else \ - Xcol[row] += tmpval * m.data(k); \ + octave_idx_type col = a.ridx(j); \ + EL_TYPE tmpval = a.data(j); \ + for (octave_idx_type k = m.cidx(col) ; \ + k < m.cidx(col+1); k++) \ + { \ + OCTAVE_QUIT; \ + octave_idx_type row = m.ridx(k); \ + if (w[row] < i + 1) \ + { \ + w[row] = i + 1; \ + retval.xridx(ii++) = row;\ + Xcol[row] = tmpval * m.data(k); \ + } \ + else \ + Xcol[row] += tmpval * m.data(k); \ + } \ } \ - } \ - retval.xcidx(i+1) = ii; \ - for (octave_idx_type k = retval.xcidx(i); k < retval.xcidx(i+1); k++) \ - retval.xdata(k) = Xcol[retval.xridx(k)]; \ + sort.sort (ri + retval.xcidx(i), ii - retval.xcidx(i)); \ + for (octave_idx_type k = retval.xcidx(i); k < ii; k++) \ + retval.xdata(k) = Xcol[retval.xridx(k)]; \ + retval.xcidx(i+1) = ii; \ + } \ + retval.maybe_compress (true);\ + } \ + else \ + { \ + retval.xcidx(0) = 0; \ + for (octave_idx_type i = 0; i < a_nc ; i++) \ + { \ + for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \ + { \ + octave_idx_type col = a.ridx(j); \ + EL_TYPE tmpval = a.data(j); \ + for (octave_idx_type k = m.cidx(col) ; \ + k < m.cidx(col+1); k++) \ + { \ + OCTAVE_QUIT; \ + octave_idx_type row = m.ridx(k); \ + if (w[row] < i + 1) \ + { \ + w[row] = i + 1; \ + Xcol[row] = tmpval * m.data(k); \ + } \ + else \ + Xcol[row] += tmpval * m.data(k); \ + } \ + } \ + for (octave_idx_type k = 0; k < nr; k++) \ + if (w[k] == i + 1 && Xcol[k] != 0.) \ + { \ + retval.xdata(ii) = Xcol[k]; \ + retval.xridx(ii++) = k; \ + } \ + retval.xcidx(i+1) = ii; \ + } \ + retval.maybe_compress ();\ } \ - retval.maybe_compress (true); \ return retval; \ } \ } diff -r d37b96139376 -r b4cb3f93c1e1 liboctave/dSparse.cc --- a/liboctave/dSparse.cc Wed Dec 28 20:16:50 2005 +0000 +++ b/liboctave/dSparse.cc Wed Jan 04 22:03:38 2006 +0000 @@ -45,6 +45,8 @@ #include "sparse-util.h" #include "SparsedbleCHOL.h" +#include "oct-sort.h" + // Fortran functions we call. extern "C" { diff -r d37b96139376 -r b4cb3f93c1e1 liboctave/sparse-sort.cc --- a/liboctave/sparse-sort.cc Wed Dec 28 20:16:50 2005 +0000 +++ b/liboctave/sparse-sort.cc Wed Jan 04 22:03:38 2006 +0000 @@ -50,6 +50,9 @@ // Instantiate the sparse sorting class template class octave_sort; +// Instantiate the sorting class of octave_idx_type, need in MUL macro +template class octave_sort; + /* ;;; Local Variables: *** ;;; mode: C++ ***