# HG changeset patch # User dbateman # Date 1147122226 0 # Node ID dd0422e4022cc465224a1f2b3448564295de4355 # Parent 1138ced03f14787c63846062ef7aab30ebc87bb2 [project @ 2006-05-08 21:03:46 by dbateman] diff -r 1138ced03f14 -r dd0422e4022c liboctave/ChangeLog --- a/liboctave/ChangeLog Mon May 08 20:23:07 2006 +0000 +++ b/liboctave/ChangeLog Mon May 08 21:03:46 2006 +0000 @@ -1,3 +1,9 @@ +2006-05-08 David Bateman + + * Sparse-op-defs.h (SPARSE_SPARSE_MUL): Set column pointers in + first pass and use to determine which algorithm to use on a + column-by-column basis. + 2006-05-04 David Bateman * SparseQR.cc, SparseQR.h, SparseCmplxQR.cc, SparseCmplxQR.h, diff -r 1138ced03f14 -r dd0422e4022c liboctave/Sparse-op-defs.h --- a/liboctave/Sparse-op-defs.h Mon May 08 20:23:07 2006 +0000 +++ b/liboctave/Sparse-op-defs.h Mon May 08 21:03:46 2006 +0000 @@ -1548,8 +1548,10 @@ else \ { \ OCTAVE_LOCAL_BUFFER (octave_idx_type, w, nr); \ + RET_TYPE retval (nr, a_nc, 0); \ for (octave_idx_type i = 0; i < nr; i++) \ w[i] = 0; \ + retval.xcidx(0) = 0; \ \ octave_idx_type nel = 0; \ \ @@ -1568,6 +1570,7 @@ OCTAVE_QUIT; \ } \ } \ + retval.xcidx(i+1) = nel; \ } \ \ if (nel == 0) \ @@ -1579,8 +1582,7 @@ \ OCTAVE_LOCAL_BUFFER (RET_EL_TYPE, Xcol, nr); \ \ - RET_TYPE retval (nr, a_nc, nel); \ - octave_idx_type ii = 0; \ + retval.change_capacity (nel); \ /* The optimal break-point as estimated from simulations */ \ /* Note that Mergesort is O(nz log(nz)) while searching all */ \ /* values is O(nr), where nz here is non-zero per row of */ \ @@ -1592,44 +1594,15 @@ /* nz: 6 25 97 585 2202 */ \ /* The below is a simplication of the 'polyfit'-ed parameters */ \ /* to these breakpoints */ \ - if (nr > 43000 || ((nr * nr) * double(a_nc)) / 43000 > nel) \ + octave_idx_type n_per_col = (a_nc > 43000 ? 43000 : \ + (a_nc * a_nc) / 43000); \ + octave_idx_type ii = 0; \ + octave_idx_type *ri = retval.xridx(); \ + octave_sort sort; \ + \ + for (octave_idx_type i = 0; i < a_nc ; i++) \ { \ - octave_idx_type *ri = retval.xridx(); \ - octave_sort sort; \ - \ - retval.xcidx(0) = 0; \ - for (octave_idx_type i = 0; i < a_nc ; i++) \ - { \ - for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \ - { \ - octave_idx_type col = a.ridx(j); \ - EL_TYPE tmpval = a.data(j); \ - for (octave_idx_type k = m.cidx(col) ; \ - k < m.cidx(col+1); k++) \ - { \ - OCTAVE_QUIT; \ - octave_idx_type row = m.ridx(k); \ - if (w[row] < i + 1) \ - { \ - w[row] = i + 1; \ - retval.xridx(ii++) = row;\ - Xcol[row] = tmpval * m.data(k); \ - } \ - else \ - Xcol[row] += tmpval * m.data(k); \ - } \ - } \ - sort.sort (ri + retval.xcidx(i), ii - retval.xcidx(i)); \ - for (octave_idx_type k = retval.xcidx(i); k < ii; k++) \ - retval.xdata(k) = Xcol[retval.xridx(k)]; \ - retval.xcidx(i+1) = ii; \ - } \ - retval.maybe_compress (true);\ - } \ - else \ - { \ - retval.xcidx(0) = 0; \ - for (octave_idx_type i = 0; i < a_nc ; i++) \ + if (retval.xcidx(i+1) - retval.xcidx(i) > n_per_col) \ { \ for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \ { \ @@ -1655,10 +1628,34 @@ retval.xdata(ii) = Xcol[k]; \ retval.xridx(ii++) = k; \ } \ - retval.xcidx(i+1) = ii; \ + } \ + else \ + { \ + for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \ + { \ + octave_idx_type col = a.ridx(j); \ + EL_TYPE tmpval = a.data(j); \ + for (octave_idx_type k = m.cidx(col) ; \ + k < m.cidx(col+1); k++) \ + { \ + OCTAVE_QUIT; \ + octave_idx_type row = m.ridx(k); \ + if (w[row] < i + 1) \ + { \ + w[row] = i + 1; \ + retval.xridx(ii++) = row;\ + Xcol[row] = tmpval * m.data(k); \ + } \ + else \ + Xcol[row] += tmpval * m.data(k); \ + } \ + } \ + sort.sort (ri + retval.xcidx(i), ii - retval.xcidx(i)); \ + for (octave_idx_type k = retval.xcidx(i); k < ii; k++) \ + retval.xdata(k) = Xcol[retval.xridx(k)]; \ } \ - retval.maybe_compress ();\ } \ + retval.maybe_compress ();\ return retval; \ } \ }