# HG changeset patch # User John W. Eaton # Date 1454449981 18000 # Node ID a10f60e13243c231e07ae2970bbe4781331cca56 # Parent 791dcb32b657722923a0fa9d0466ca6de8e3626e style fixes in liboctave/numeric directory * base-qr.h, randgamma.h, randmtzig.h, randpoisson.h, sparse-chol.cc, sparse-chol.h, sparse-dmsolve.cc, sparse-lu.cc, sparse-lu.h, sparse-qr.cc, sparse-qr.h, oct-sparse.h: Style fixes. diff -r 791dcb32b657 -r a10f60e13243 liboctave/numeric/base-qr.h --- a/liboctave/numeric/base-qr.h Tue Feb 02 12:34:06 2016 -0500 +++ b/liboctave/numeric/base-qr.h Tue Feb 02 16:53:01 2016 -0500 @@ -55,6 +55,7 @@ q = a.q; r = a.r; } + return *this; } @@ -74,8 +75,8 @@ qr_type r; }; -#ifndef HAVE_QRUPDATE -void warn_qrupdate_once (void); +#if ! defined (HAVE_QRUPDATE) +extern void warn_qrupdate_once (void); #endif #endif diff -r 791dcb32b657 -r a10f60e13243 liboctave/numeric/randgamma.h --- a/liboctave/numeric/randgamma.h Tue Feb 02 12:34:06 2016 -0500 +++ b/liboctave/numeric/randgamma.h Tue Feb 02 16:53:01 2016 -0500 @@ -23,7 +23,8 @@ /* Original version written by Paul Kienzle distributed as free software in the in the public domain. */ -#ifndef _RANDGAMMA_H +#if ! defined (octave_randgamma_h) +#define octave_radgamma_h 1 #ifdef __cplusplus extern "C" { @@ -39,4 +40,5 @@ #ifdef __cplusplus } #endif + #endif diff -r 791dcb32b657 -r a10f60e13243 liboctave/numeric/randmtzig.h --- a/liboctave/numeric/randmtzig.h Tue Feb 02 12:34:06 2016 -0500 +++ b/liboctave/numeric/randmtzig.h Tue Feb 02 16:53:01 2016 -0500 @@ -61,8 +61,8 @@ */ -#ifndef _RANDMTZIG_H -#define _RANDMTZIG_H +#if !defined (octave_randmtzig_h) +#define octave__randmtzig_h 1 #define MT_N 624 @@ -70,14 +70,14 @@ extern "C" { #endif -/* === Mersenne Twister === */ +/* Mersenne Twister. */ extern OCTAVE_API void oct_init_by_int (uint32_t s); -extern OCTAVE_API void oct_init_by_array (uint32_t init_key[], int key_length); +extern OCTAVE_API void oct_init_by_array (uint32_t *init_key, int key_length); extern OCTAVE_API void oct_init_by_entropy (void); -extern OCTAVE_API void oct_set_state (uint32_t save[]); -extern OCTAVE_API void oct_get_state (uint32_t save[]); +extern OCTAVE_API void oct_set_state (uint32_t *save); +extern OCTAVE_API void oct_get_state (uint32_t *save); -/* === Array generators === */ +/* Array generators. */ extern OCTAVE_API double oct_randu (void); extern OCTAVE_API double oct_randn (void); extern OCTAVE_API double oct_rande (void); @@ -86,7 +86,7 @@ extern OCTAVE_API float oct_float_randn (void); extern OCTAVE_API float oct_float_rande (void); -/* === Array generators === */ +/* Array generators. */ extern OCTAVE_API void oct_fill_randu (octave_idx_type n, double *p); extern OCTAVE_API void oct_fill_randn (octave_idx_type n, double *p); extern OCTAVE_API void oct_fill_rande (octave_idx_type n, double *p); @@ -98,4 +98,5 @@ #ifdef __cplusplus } #endif + #endif diff -r 791dcb32b657 -r a10f60e13243 liboctave/numeric/randpoisson.h --- a/liboctave/numeric/randpoisson.h Tue Feb 02 12:34:06 2016 -0500 +++ b/liboctave/numeric/randpoisson.h Tue Feb 02 16:53:01 2016 -0500 @@ -23,20 +23,27 @@ /* Original version written by Paul Kienzle distributed as free software in the in the public domain. */ -#ifndef _RANDPOISSON_H +#if ! defined (octave_randpoisson_h) +#define octave_randpoisson_h 1 #ifdef __cplusplus extern "C" { #endif -extern OCTAVE_API double oct_randp (double L); -extern OCTAVE_API void oct_fill_randp (double L, octave_idx_type n, double *p); +extern OCTAVE_API double +oct_randp (double L); + +extern OCTAVE_API void +oct_fill_randp (double L, octave_idx_type n, double *p); -extern OCTAVE_API float oct_float_randp (float L); -extern OCTAVE_API void oct_fill_float_randp (float L, octave_idx_type n, - float *p); +extern OCTAVE_API float +oct_float_randp (float L); + +extern OCTAVE_API void +oct_fill_float_randp (float L, octave_idx_type n, float *p); #ifdef __cplusplus } #endif + #endif diff -r 791dcb32b657 -r a10f60e13243 liboctave/numeric/sparse-chol.cc --- a/liboctave/numeric/sparse-chol.cc Tue Feb 02 12:34:06 2016 -0500 +++ b/liboctave/numeric/sparse-chol.cc Tue Feb 02 16:53:01 2016 -0500 @@ -1,5 +1,6 @@ /* +Copyright (C) 2016 John W. Eaton Copyright (C) 2005-2015 David Bateman Copyright (C) 1998-2005 Andy Adler @@ -137,28 +138,26 @@ void sparse_chol::sparse_chol_rep::drop_zeros (const cholmod_sparse *S) { - chol_elt sik; - octave_idx_type *Sp, *Si; - chol_elt *Sx; - octave_idx_type pdest, k, ncol, p, pend; - if (! S) return; - Sp = static_cast(S->p); - Si = static_cast(S->i); - Sx = static_cast(S->x); - pdest = 0; - ncol = S->ncol; + octave_idx_type *Sp = static_cast(S->p); + octave_idx_type *Si = static_cast(S->i); + chol_elt *Sx = static_cast(S->x); + + octave_idx_type pdest = 0; + octave_idx_type ncol = S->ncol; - for (k = 0; k < ncol; k++) + for (octave_idx_type k = 0; k < ncol; k++) { - p = Sp[k]; - pend = Sp[k+1]; + octave_idx_type p = Sp[k]; + octave_idx_type pend = Sp[k+1]; Sp[k] = pdest; + for (; p < pend; p++) { - sik = Sx[p]; + chol_elt sik = Sx[p]; + if (CHOLMOD_IS_NONZERO (sik)) { if (p != pdest) @@ -166,10 +165,12 @@ Si[pdest] = Si[p]; Sx[pdest] = sik; } + pdest++; } } } + Sp[ncol] = pdest; } @@ -202,6 +203,7 @@ volatile octave_idx_type info = 0; #ifdef HAVE_CHOLMOD + octave_idx_type a_nr = a.rows (); octave_idx_type a_nc = a.cols (); @@ -211,10 +213,12 @@ cholmod_common *cm = &Common; // Setup initial parameters + CHOLMOD_NAME(start) (cm); cm->prefer_zomplex = false; double spu = octave_sparse_params::get_key ("spumoni"); + if (spu == 0.) { cm->print = -1; @@ -227,6 +231,7 @@ } cm->error_handler = &SparseCholError; + SUITESPARSE_ASSIGN_FPTR2 (divcomplex_func, cm->complex_divide, divcomplex); SUITESPARSE_ASSIGN_FPTR2 (hypot_func, cm->hypotenuse, hypot); @@ -240,6 +245,7 @@ cholmod_sparse A; cholmod_sparse *ac = &A; double dummy; + ac->nrow = a_nr; ac->ncol = a_nc; @@ -302,6 +308,7 @@ CHOLMOD_NAME(reallocate_sparse) (static_cast(Lsparse->p)[minor_p], Lsparse, cm); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + Lsparse->ncol = minor_p; } @@ -336,6 +343,7 @@ sparse_chol::sparse_chol_rep::Q (void) const { #ifdef HAVE_CHOLMOD + octave_idx_type n = Lsparse->nrow; SparseMatrix p (n, n, n); @@ -345,11 +353,15 @@ p.xridx (i) = static_cast(perms (i)); p.xdata (i) = 1; } + p.xcidx (n) = n; return p; + #else + return SparseMatrix (); + #endif } @@ -420,20 +432,29 @@ sparse_chol::L (void) const { #ifdef HAVE_CHOLMOD + cholmod_sparse *m = rep->L (); + octave_idx_type nc = m->ncol; octave_idx_type nnz = m->nzmax; + chol_type ret (m->nrow, nc, nnz); + for (octave_idx_type j = 0; j < nc+1; j++) ret.xcidx (j) = static_cast(m->p)[j]; + for (octave_idx_type i = 0; i < nnz; i++) { ret.xridx (i) = static_cast(m->i)[i]; ret.xdata (i) = static_cast(m->x)[i]; } + return ret; + #else + return chol_type (); + #endif } @@ -477,7 +498,9 @@ sparse_chol::inverse (void) const { chol_type retval; + #ifdef HAVE_CHOLMOD + cholmod_sparse *m = rep->L (); octave_idx_type n = m->ncol; ColumnVector perms = rep->perm (); @@ -489,11 +512,14 @@ if (perms.numel () == n) { SparseMatrix Qc = Q (); + retval = Qc * linv * linv.hermitian () * Qc.transpose (); } else retval = linv * linv.hermitian (); + #endif + return retval; } diff -r 791dcb32b657 -r a10f60e13243 liboctave/numeric/sparse-chol.h --- a/liboctave/numeric/sparse-chol.h Tue Feb 02 12:34:06 2016 -0500 +++ b/liboctave/numeric/sparse-chol.h Tue Feb 02 16:53:01 2016 -0500 @@ -1,5 +1,6 @@ /* +Copyright (C) 2016 John W. Eaton Copyright (C) 2005-2015 David Bateman Copyright (C) 1998-2005 Andy Adler @@ -29,6 +30,11 @@ #include "dSparse.h" #include "oct-sparse.h" +// If the sparse matrix classes become templated on the element type +// (i.e., sparse_matrix), then it might be best to make the +// template parameter of this class also be the element type instead +// of the matrix type. + template class sparse_chol diff -r 791dcb32b657 -r a10f60e13243 liboctave/numeric/sparse-dmsolve.cc --- a/liboctave/numeric/sparse-dmsolve.cc Tue Feb 02 12:34:06 2016 -0500 +++ b/liboctave/numeric/sparse-dmsolve.cc Tue Feb 02 16:53:01 2016 -0500 @@ -1,5 +1,6 @@ /* +Copyright (C) 2016 John W. Eaton Copyright (C) 2006-2015 David Bateman This file is part of Octave. @@ -44,7 +45,9 @@ { octave_idx_type nr = rend - rst; octave_idx_type nc = cend - cst; + maxnz = (maxnz < 0 ? A.nnz () : maxnz); + octave_idx_type nz; // Cast to uint64 to handle overflow in this multiplication @@ -54,21 +57,28 @@ nz = maxnz; MSparse B (nr, nc, (nz < maxnz ? nz : maxnz)); + // Some sparse functions can support lazy indexing (where elements // in the row are in no particular order), even though octave in // general can't. For those functions that can using it is a big // win here in terms of speed. + if (lazy) { nz = 0; + for (octave_idx_type j = cst ; j < cend ; j++) { octave_idx_type qq = (Q ? Q[j] : j); + B.xcidx (j - cst) = nz; + for (octave_idx_type p = A.cidx (qq) ; p < A.cidx (qq+1) ; p++) { octave_quit (); + octave_idx_type r = (Pinv ? Pinv[A.ridx (p)] : A.ridx (p)); + if (r >= rst && r < rend) { B.xdata (nz) = A.data (p); @@ -76,32 +86,43 @@ } } } + B.xcidx (cend - cst) = nz ; } else { OCTAVE_LOCAL_BUFFER (T, X, rend - rst); + octave_sort sort; octave_idx_type *ri = B.xridx (); + nz = 0; + for (octave_idx_type j = cst ; j < cend ; j++) { octave_idx_type qq = (Q ? Q[j] : j); + B.xcidx (j - cst) = nz; + for (octave_idx_type p = A.cidx (qq) ; p < A.cidx (qq+1) ; p++) { octave_quit (); + octave_idx_type r = (Pinv ? Pinv[A.ridx (p)] : A.ridx (p)); + if (r >= rst && r < rend) { X[r-rst] = A.data (p); B.xridx (nz++) = r - rst ; } } + sort.sort (ri + B.xcidx (j - cst), nz - B.xcidx (j - cst)); + for (octave_idx_type p = B.cidx (j - cst); p < nz; p++) B.xdata (p) = X[B.xridx (p)]; } + B.xcidx (cend - cst) = nz ; } @@ -117,8 +138,12 @@ { r2 -= 1; c2 -= 1; - if (r1 > r2) { std::swap (r1, r2); } - if (c1 > c2) { std::swap (c1, c2); } + + if (r1 > r2) + std::swap (r1, r2); + + if (c1 > c2) + std::swap (c1, c2); octave_idx_type new_r = r2 - r1 + 1; octave_idx_type new_c = c2 - c1 + 1; @@ -126,8 +151,10 @@ MArray result (dim_vector (new_r, new_c)); for (octave_idx_type j = 0; j < new_c; j++) - for (octave_idx_type i = 0; i < new_r; i++) - result.xelem (i, j) = m.elem (r1+i, c1+j); + { + for (octave_idx_type i = 0; i < new_r; i++) + result.xelem (i, j) = m.elem (r1+i, c1+j); + } return result; } @@ -138,14 +165,19 @@ octave_idx_type r, octave_idx_type c) { T *ax = a.fortran_vec (); + const T *bx = b.fortran_vec (); + octave_idx_type anr = a.rows (); + octave_idx_type nr = b.rows (); octave_idx_type nc = b.cols (); + for (octave_idx_type j = 0; j < nc; j++) { octave_idx_type aoff = (c + j) * anr; octave_idx_type boff = j * nr; + for (octave_idx_type i = 0; i < nr; i++) { octave_quit (); @@ -161,10 +193,12 @@ { octave_idx_type b_rows = b.rows (); octave_idx_type b_cols = b.cols (); + octave_idx_type nr = a.rows (); octave_idx_type nc = a.cols (); OCTAVE_LOCAL_BUFFER (octave_idx_type, Qinv, nr); + for (octave_idx_type i = 0; i < nr; i++) Qinv[Q[i]] = i; @@ -175,14 +209,22 @@ nel += a.xcidx (nc) - a.xcidx (c + b_cols); for (octave_idx_type i = c; i < c + b_cols; i++) - for (octave_idx_type j = a.xcidx (i); j < a.xcidx (i+1); j++) - if (Qinv[a.xridx (j)] < r || Qinv[a.xridx (j)] >= r + b_rows) - nel++; + { + for (octave_idx_type j = a.xcidx (i); j < a.xcidx (i+1); j++) + { + if (Qinv[a.xridx (j)] < r || Qinv[a.xridx (j)] >= r + b_rows) + nel++; + } + } OCTAVE_LOCAL_BUFFER (T, X, nr); + octave_sort sort; + MSparse tmp (a); + a = MSparse (nr, nc, nel); + octave_idx_type *ri = a.xridx (); for (octave_idx_type i = 0; i < tmp.cidx (c); i++) @@ -190,6 +232,7 @@ a.xdata (i) = tmp.xdata (i); a.xridx (i) = tmp.xridx (i); } + for (octave_idx_type i = 0; i < c + 1; i++) a.xcidx (i) = tmp.xcidx (i); @@ -200,11 +243,13 @@ octave_quit (); for (octave_idx_type j = tmp.xcidx (i); j < tmp.xcidx (i+1); j++) - if (Qinv[tmp.xridx (j)] < r || Qinv[tmp.xridx (j)] >= r + b_rows) - { - X[tmp.xridx (j)] = tmp.xdata (j); - a.xridx (ii++) = tmp.xridx (j); - } + { + if (Qinv[tmp.xridx (j)] < r || Qinv[tmp.xridx (j)] >= r + b_rows) + { + X[tmp.xridx (j)] = tmp.xdata (j); + a.xridx (ii++) = tmp.xridx (j); + } + } octave_quit (); @@ -215,8 +260,10 @@ } sort.sort (ri + a.xcidx (i), ii - a.xcidx (i)); + for (octave_idx_type p = a.xcidx (i); p < ii; p++) a.xdata (p) = X[a.xridx (p)]; + a.xcidx (i+1) = ii; } @@ -227,6 +274,7 @@ a.xdata (ii) = tmp.xdata (j); a.xridx (ii++) = tmp.xridx (j); } + a.xcidx (i+1) = ii; } } @@ -237,9 +285,13 @@ { octave_idx_type b_nr = b.rows (); octave_idx_type b_nc = b.cols (); + const T *Bx = b.fortran_vec (); + a.resize (dim_vector (b_nr, b_nc)); + RT *Btx = a.fortran_vec (); + for (octave_idx_type j = 0; j < b_nc; j++) { octave_idx_type off = j * b_nr; @@ -258,12 +310,17 @@ octave_idx_type b_nr = b.rows (); octave_idx_type b_nc = b.cols (); octave_idx_type b_nz = b.nnz (); + octave_idx_type nz = 0; + a = MSparse (b_nr, b_nc, b_nz); octave_sort sort; octave_idx_type *ri = a.xridx (); + OCTAVE_LOCAL_BUFFER (RT, X, b_nr); + a.xcidx (0) = 0; + for (octave_idx_type j = 0; j < b_nc; j++) { for (octave_idx_type i = b.cidx (j); i < b.cidx (j+1); i++) @@ -273,12 +330,15 @@ X[r] = b.data (i); a.xridx (nz++) = p[b.ridx (i)]; } + sort.sort (ri + a.xcidx (j), nz - a.xcidx (j)); + for (octave_idx_type i = a.cidx (j); i < nz; i++) { octave_quit (); a.xdata (i) = X[a.xridx (i)]; } + a.xcidx (j+1) = nz; } } @@ -295,10 +355,13 @@ dmsolve (const ST &a, const T &b, octave_idx_type &info) { #ifdef HAVE_CXSPARSE + octave_idx_type nr = a.rows (); octave_idx_type nc = a.cols (); + octave_idx_type b_nr = b.rows (); octave_idx_type b_nc = b.cols (); + RT retval; if (nr < 0 || nc < 0 || nr != b_nr) @@ -310,12 +373,15 @@ else { octave_idx_type nnz_remaining = a.nnz (); + CXSPARSE_DNAME () csm; + csm.m = nr; csm.n = nc; csm.x = 0; csm.nz = -1; csm.nzmax = a.nnz (); + // Cast away const on A, with full knowledge that CSparse won't touch it. // Prevents the methods below making a copy of the data. csm.p = const_cast(a.cidx ()); @@ -326,11 +392,14 @@ octave_idx_type *q = dm->q; OCTAVE_LOCAL_BUFFER (octave_idx_type, pinv, nr); + for (octave_idx_type i = 0; i < nr; i++) pinv[p[i]] = i; + RT btmp; dmsolve_permute (btmp, b, pinv); info = 0; + retval.resize (nc, b_nc); // Leading over-determined block @@ -339,17 +408,16 @@ ST m = dmsolve_extract (a, pinv, q, dm->rr[2], nr, dm->cc[3], nc, nnz_remaining, true); nnz_remaining -= m.nnz (); - RT mtmp = - qrsolve (m, dmsolve_extract (btmp, 0, 0, dm->rr[2], b_nr, 0, - b_nc), info); + RT mtmp = qrsolve (m, dmsolve_extract (btmp, 0, 0, dm->rr[2], + b_nr, 0, b_nc), info); dmsolve_insert (retval, mtmp, q, dm->cc[3], 0); + if (dm->rr[2] > 0 && ! info) { m = dmsolve_extract (a, pinv, q, 0, dm->rr[2], dm->cc[3], nc, nnz_remaining, true); nnz_remaining -= m.nnz (); - RT ctmp = dmsolve_extract (btmp, 0, 0, 0, - dm->rr[2], 0, b_nc); + RT ctmp = dmsolve_extract (btmp, 0, 0, 0, dm->rr[2], 0, b_nc); btmp.insert (ctmp - m * mtmp, 0, 0); } } @@ -379,8 +447,7 @@ m = dmsolve_extract (a, pinv, q, 0, dm->rr[1], dm->cc[2], dm->cc[3], nnz_remaining, true); nnz_remaining -= m.nnz (); - RT ctmp = dmsolve_extract (btmp, 0, 0, 0, - dm->rr[1], 0, b_nc); + RT ctmp = dmsolve_extract (btmp, 0, 0, 0, dm->rr[1], 0, b_nc); btmp.insert (ctmp - m * mtmp, 0, 0); } } @@ -390,18 +457,20 @@ { ST m = dmsolve_extract (a, pinv, q, 0, dm->rr[1], 0, dm->cc[2], nnz_remaining, true); - RT mtmp = - qrsolve (m, dmsolve_extract (btmp, 0, 0, 0, dm->rr[1] , 0, - b_nc), info); + RT mtmp = qrsolve (m, dmsolve_extract (btmp, 0, 0, 0, dm->rr[1], + 0, b_nc), info); dmsolve_insert (retval, mtmp, q, 0, 0); } CXSPARSE_DNAME (_dfree) (dm); } + return retval; #else + (*current_liboctave_error_handler) ("support for CXSparse was unavailable or disabled when liboctave was built"); + #endif } diff -r 791dcb32b657 -r a10f60e13243 liboctave/numeric/sparse-lu.cc --- a/liboctave/numeric/sparse-lu.cc Tue Feb 02 12:34:06 2016 -0500 +++ b/liboctave/numeric/sparse-lu.cc Tue Feb 02 16:53:01 2016 -0500 @@ -1,5 +1,6 @@ /* +Copyright (C) 2016 John W. Eaton Copyright (C) 2004-2015 David Bateman Copyright (C) 1998-2004 Andy Adler @@ -391,6 +392,7 @@ tmp = (piv_thres (0) > 1. ? 1. : piv_thres (0)); if (! xisnan (tmp)) Control (UMFPACK_PIVOT_TOLERANCE) = tmp; + tmp = (piv_thres (1) > 1. ? 1. : piv_thres (1)); if (! xisnan (tmp)) Control (UMFPACK_SYM_PIVOT_TOLERANCE) = tmp; @@ -546,8 +548,10 @@ } #else + (*current_liboctave_error_handler) ("support for UMFPACK was unavailable or disabled when liboctave was built"); + #endif } @@ -560,6 +564,7 @@ : Lfact (), Ufact (), Rfact (), cond (0), P (), Q () { #ifdef HAVE_UMFPACK + if (milu) (*current_liboctave_error_handler) ("Modified incomplete LU not implemented"); @@ -760,8 +765,10 @@ ("Option udiag of incomplete LU not implemented"); #else + (*current_liboctave_error_handler) ("support for UMFPACK was unavailable or disabled when liboctave was built"); + #endif } @@ -784,6 +791,7 @@ Yout.xridx (ii) = Ufact.ridx (i); Yout.xdata (ii++) = Ufact.data (i); } + if (j < nz) { // Note the +1 skips the 1.0 on the diagonal @@ -794,6 +802,7 @@ Yout.xdata (ii++) = Lfact.data (i); } } + Yout.xcidx (j + 1) = ii; } @@ -804,7 +813,6 @@ SparseMatrix sparse_lu::Pr (void) const { - octave_idx_type nr = Lfact.rows (); SparseMatrix Pout (nr, nr, nr); @@ -815,6 +823,7 @@ Pout.ridx (P (i)) = i; Pout.data (i) = 1; } + Pout.cidx (nr) = nr; return Pout; @@ -824,7 +833,6 @@ ColumnVector sparse_lu::Pr_vec (void) const { - octave_idx_type nr = Lfact.rows (); ColumnVector Pout (nr); @@ -856,6 +864,7 @@ Pout.ridx (i) = Q (i); Pout.data (i) = 1; } + Pout.cidx (nc) = nc; return Pout; @@ -865,7 +874,6 @@ ColumnVector sparse_lu::Pc_vec (void) const { - octave_idx_type nc = Ufact.cols (); ColumnVector Pout (nc); diff -r 791dcb32b657 -r a10f60e13243 liboctave/numeric/sparse-lu.h --- a/liboctave/numeric/sparse-lu.h Tue Feb 02 12:34:06 2016 -0500 +++ b/liboctave/numeric/sparse-lu.h Tue Feb 02 16:53:01 2016 -0500 @@ -1,5 +1,6 @@ /* +Copyright (C) 2016 John W. Eaton Copyright (C) 2004-2015 David Bateman Copyright (C) 1998-2004 Andy Adler @@ -28,6 +29,11 @@ #include "MArray.h" #include "dSparse.h" +// If the sparse matrix classes become templated on the element type +// (i.e., sparse_matrix), then it might be best to make the +// template parameter of this class also be the element type instead +// of the matrix type. + template class sparse_lu diff -r 791dcb32b657 -r a10f60e13243 liboctave/numeric/sparse-qr.cc --- a/liboctave/numeric/sparse-qr.cc Tue Feb 02 12:34:06 2016 -0500 +++ b/liboctave/numeric/sparse-qr.cc Tue Feb 02 16:53:01 2016 -0500 @@ -1,7 +1,7 @@ /* +Copyright (C) 2016 John W. Eaton Copyright (C) 2005-2015 David Bateman -Copyright (C) 2016 John W. Eaton This file is part of Octave. @@ -43,7 +43,7 @@ cxsparse_types { public: -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) typedef CXSPARSE_DNAME (s) symbolic_type; typedef CXSPARSE_DNAME (n) numeric_type; #endif @@ -54,7 +54,7 @@ cxsparse_types { public: -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) typedef CXSPARSE_ZNAME (s) symbolic_type; typedef CXSPARSE_ZNAME (n) numeric_type; #endif @@ -71,7 +71,7 @@ bool ok (void) const { -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) return (N && S); #else return false; @@ -121,13 +121,19 @@ ColumnVector sparse_qr::sparse_qr_rep::Pinv (void) const { -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) + ColumnVector ret (N->L->m); + for (octave_idx_type i = 0; i < N->L->m; i++) ret.xelem (i) = S->pinv[i]; + return ret; + #else + return ColumnVector (); + #endif } @@ -135,13 +141,19 @@ ColumnVector sparse_qr::sparse_qr_rep::P (void) const { -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) + ColumnVector ret (N->L->m); + for (octave_idx_type i = 0; i < N->L->m; i++) ret.xelem (S->pinv[i]) = i; + return ret; + #else + return ColumnVector (); + #endif } @@ -153,12 +165,14 @@ sparse_qr::sparse_qr_rep::sparse_qr_rep (const SparseMatrix& a, int order) : count (1), nrows (a.rows ()), ncols (a.columns ()) -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) , S (0), N (0) #endif { -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) + CXSPARSE_DNAME () A; + A.nzmax = a.nnz (); A.m = nrows; A.n = ncols; @@ -168,10 +182,12 @@ A.i = const_cast(a.ridx ()); A.x = const_cast(a.data ()); A.nz = -1; + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; S = CXSPARSE_DNAME (_sqr) (order, &A, 1); N = CXSPARSE_DNAME (_qr) (&A, S); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + if (! N) (*current_liboctave_error_handler) ("sparse_qr: sparse matrix QR factorization filled"); @@ -179,15 +195,17 @@ count = 1; #else + (*current_liboctave_error_handler) ("sparse_qr: support for CXSparse was unavailable or disabled when liboctave was built"); + #endif } template <> sparse_qr::sparse_qr_rep::~sparse_qr_rep (void) { -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) CXSPARSE_DNAME (_sfree) (S); CXSPARSE_DNAME (_nfree) (N); #endif @@ -197,9 +215,11 @@ SparseMatrix sparse_qr::sparse_qr_rep::V (void) const { -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) + // Drop zeros from V and sort // FIXME: Is the double transpose to sort necessary? + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_dropzeros) (N->L); CXSPARSE_DNAME () *D = CXSPARSE_DNAME (_transpose) (N->L, 1); @@ -211,16 +231,22 @@ octave_idx_type nc = N->L->n; octave_idx_type nz = N->L->nzmax; SparseMatrix ret (N->L->m, nc, nz); + for (octave_idx_type j = 0; j < nc+1; j++) ret.xcidx (j) = N->L->p[j]; + for (octave_idx_type j = 0; j < nz; j++) { ret.xridx (j) = N->L->i[j]; ret.xdata (j) = N->L->x[j]; } + return ret; + #else + return SparseMatrix (); + #endif } @@ -228,9 +254,11 @@ SparseMatrix sparse_qr::sparse_qr_rep::R (bool econ) const { -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) + // Drop zeros from R and sort // FIXME: Is the double transpose to sort necessary? + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_dropzeros) (N->U); CXSPARSE_DNAME () *D = CXSPARSE_DNAME (_transpose) (N->U, 1); @@ -246,14 +274,19 @@ for (octave_idx_type j = 0; j < nc+1; j++) ret.xcidx (j) = N->U->p[j]; + for (octave_idx_type j = 0; j < nz; j++) { ret.xridx (j) = N->U->i[j]; ret.xdata (j) = N->U->x[j]; } + return ret; + #else + return SparseMatrix (); + #endif } @@ -261,14 +294,19 @@ Matrix sparse_qr::sparse_qr_rep::C (const Matrix& b) const { -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) + octave_idx_type b_nr = b.rows (); octave_idx_type b_nc = b.cols (); + octave_idx_type nc = N->L->n; octave_idx_type nr = nrows; + const double *bvec = b.fortran_vec (); + Matrix ret (b_nr, b_nc); double *vec = ret.fortran_vec (); + if (nr < 0 || nc < 0 || nr != b_nr) (*current_liboctave_error_handler) ("matrix dimension mismatch"); @@ -277,29 +315,40 @@ else { OCTAVE_LOCAL_BUFFER (double, buf, S->m2); + for (volatile octave_idx_type j = 0, idx = 0; j < b_nc; j++, idx+=b_nr) { octave_quit (); + for (octave_idx_type i = nr; i < S->m2; i++) buf[i] = 0.; + volatile octave_idx_type nm = (nr < nc ? nr : nc); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_ipvec) (S->pinv, bvec + idx, buf, b_nr); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type i = 0; i < nm; i++) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_happly) (N->L, i, N->B[i], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + for (octave_idx_type i = 0; i < b_nr; i++) vec[i+idx] = buf[i]; } } + return ret; + #else + return Matrix (); + #endif } @@ -307,11 +356,12 @@ Matrix sparse_qr::sparse_qr_rep::Q (void) const { -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) octave_idx_type nc = N->L->n; octave_idx_type nr = nrows; Matrix ret (nr, nr); double *vec = ret.fortran_vec (); + if (nr < 0 || nc < 0) (*current_liboctave_error_handler) ("matrix dimension mismatch"); @@ -320,34 +370,48 @@ else { OCTAVE_LOCAL_BUFFER (double, bvec, nr + 1); + for (octave_idx_type i = 0; i < nr; i++) bvec[i] = 0.; + OCTAVE_LOCAL_BUFFER (double, buf, S->m2); + for (volatile octave_idx_type j = 0, idx = 0; j < nr; j++, idx+=nr) { octave_quit (); + bvec[j] = 1.0; for (octave_idx_type i = nr; i < S->m2; i++) buf[i] = 0.; + volatile octave_idx_type nm = (nr < nc ? nr : nc); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_ipvec) (S->pinv, bvec, buf, nr); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type i = 0; i < nm; i++) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_happly) (N->L, i, N->B[i], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + for (octave_idx_type i = 0; i < nr; i++) vec[i+idx] = buf[i]; + bvec[j] = 0.0; } } + return ret.transpose (); + #else + return Matrix (); + #endif } @@ -359,7 +423,7 @@ { info = -1; -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) octave_idx_type nr = nrows; octave_idx_type nc = ncols; @@ -371,23 +435,30 @@ Matrix x (nc, b_nc); double *vec = x.fortran_vec (); + OCTAVE_LOCAL_BUFFER (double, buf, S->m2); + for (volatile octave_idx_type i = 0, idx = 0, bidx = 0; i < b_nc; i++, idx+=nc, bidx+=b_nr) { octave_quit (); + for (octave_idx_type j = nr; j < S->m2; j++) buf[j] = 0.; + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_ipvec) (S->pinv, bvec + bidx, buf, nr); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = 0; j < nc; j++) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_happly) (N->L, j, N->B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_usolve) (N->U, buf); CXSPARSE_DNAME (_ipvec) (S->q, buf, vec + idx, nc); @@ -413,7 +484,7 @@ { info = -1; -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) // These are swapped because the original matrix was transposed in // sparse_qr::solve. @@ -428,25 +499,33 @@ Matrix x (nc, b_nc); double *vec = x.fortran_vec (); + volatile octave_idx_type nbuf = (nc > S->m2 ? nc : S->m2); + OCTAVE_LOCAL_BUFFER (double, buf, nbuf); + for (volatile octave_idx_type i = 0, idx = 0, bidx = 0; i < b_nc; i++, idx+=nc, bidx+=b_nr) { octave_quit (); + for (octave_idx_type j = nr; j < nbuf; j++) buf[j] = 0.; + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_pvec) (S->q, bvec + bidx, buf, nr); CXSPARSE_DNAME (_utsolve) (N->U, buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = nr-1; j >= 0; j--) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_happly) (N->L, j, N->B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_pvec) (S->pinv, buf, vec + idx, nc); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; @@ -471,7 +550,7 @@ { info = -1; -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) octave_idx_type nr = nrows; octave_idx_type nc = ncols; @@ -479,31 +558,38 @@ octave_idx_type b_nr = b.rows (); octave_idx_type b_nc = b.cols (); - volatile octave_idx_type ii, x_nz; - SparseMatrix x (nc, b_nc, b.nnz ()); x.xcidx (0) = 0; - x_nz = b.nnz (); - ii = 0; + + volatile octave_idx_type x_nz = b.nnz (); + volatile octave_idx_type ii = 0; + OCTAVE_LOCAL_BUFFER (double, Xx, (b_nr > nc ? b_nr : nc)); OCTAVE_LOCAL_BUFFER (double, buf, S->m2); + for (volatile octave_idx_type i = 0, idx = 0; i < b_nc; i++, idx+=nc) { octave_quit (); + for (octave_idx_type j = 0; j < b_nr; j++) Xx[j] = b.xelem (j,i); + for (octave_idx_type j = nr; j < S->m2; j++) buf[j] = 0.; + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_ipvec) (S->pinv, Xx, buf, nr); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = 0; j < nc; j++) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_happly) (N->L, j, N->B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_usolve) (N->U, buf); CXSPARSE_DNAME (_ipvec) (S->q, buf, Xx, nc); @@ -512,6 +598,7 @@ for (octave_idx_type j = 0; j < nc; j++) { double tmp = Xx[j]; + if (tmp != 0.0) { if (ii == x_nz) @@ -522,10 +609,12 @@ x.change_capacity (sz); x_nz = sz; } + x.xdata (ii) = tmp; x.xridx (ii++) = j; } } + x.xcidx (i+1) = ii; } @@ -548,7 +637,7 @@ { info = -1; -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) // These are swapped because the original matrix was transposed in // sparse_qr::solve. @@ -559,33 +648,40 @@ octave_idx_type b_nr = b.rows (); octave_idx_type b_nc = b.cols (); - volatile octave_idx_type ii, x_nz; - SparseMatrix x (nc, b_nc, b.nnz ()); x.xcidx (0) = 0; - x_nz = b.nnz (); - ii = 0; + + volatile octave_idx_type x_nz = b.nnz (); + volatile octave_idx_type ii = 0; volatile octave_idx_type nbuf = (nc > S->m2 ? nc : S->m2); + OCTAVE_LOCAL_BUFFER (double, Xx, (b_nr > nc ? b_nr : nc)); OCTAVE_LOCAL_BUFFER (double, buf, nbuf); + for (volatile octave_idx_type i = 0, idx = 0; i < b_nc; i++, idx+=nc) { octave_quit (); + for (octave_idx_type j = 0; j < b_nr; j++) Xx[j] = b.xelem (j,i); + for (octave_idx_type j = nr; j < nbuf; j++) buf[j] = 0.; + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_pvec) (S->q, Xx, buf, nr); CXSPARSE_DNAME (_utsolve) (N->U, buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = nr-1; j >= 0; j--) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_happly) (N->L, j, N->B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_pvec) (S->pinv, buf, Xx, nc); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; @@ -593,6 +689,7 @@ for (octave_idx_type j = 0; j < nc; j++) { double tmp = Xx[j]; + if (tmp != 0.0) { if (ii == x_nz) @@ -603,10 +700,12 @@ x.change_capacity (sz); x_nz = sz; } + x.xdata (ii) = tmp; x.xridx (ii++) = j; } } + x.xcidx (i+1) = ii; } @@ -631,7 +730,7 @@ { info = -1; -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) octave_idx_type nr = nrows; octave_idx_type nc = ncols; @@ -641,48 +740,62 @@ ComplexMatrix x (nc, b_nc); Complex *vec = x.fortran_vec (); + OCTAVE_LOCAL_BUFFER (double, Xx, (b_nr > nc ? b_nr : nc)); OCTAVE_LOCAL_BUFFER (double, Xz, (b_nr > nc ? b_nr : nc)); OCTAVE_LOCAL_BUFFER (double, buf, S->m2); + for (volatile octave_idx_type i = 0, idx = 0; i < b_nc; i++, idx+=nc) { octave_quit (); + for (octave_idx_type j = 0; j < b_nr; j++) { Complex c = b.xelem (j,i); Xx[j] = std::real (c); Xz[j] = std::imag (c); } + for (octave_idx_type j = nr; j < S->m2; j++) buf[j] = 0.; + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_ipvec) (S->pinv, Xx, buf, nr); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = 0; j < nc; j++) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_happly) (N->L, j, N->B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_usolve) (N->U, buf); CXSPARSE_DNAME (_ipvec) (S->q, buf, Xx, nc); + for (octave_idx_type j = nr; j < S->m2; j++) buf[j] = 0.; + CXSPARSE_DNAME (_ipvec) (S->pinv, Xz, buf, nr); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = 0; j < nc; j++) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_happly) (N->L, j, N->B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_usolve) (N->U, buf); CXSPARSE_DNAME (_ipvec) (S->q, buf, Xz, nc); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (octave_idx_type j = 0; j < nc; j++) vec[j+idx] = Complex (Xx[j], Xz[j]); } @@ -706,7 +819,7 @@ { info = -1; -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) // These are swapped because the original matrix was transposed in // sparse_qr::solve. @@ -719,51 +832,66 @@ ComplexMatrix x (nc, b_nc); Complex *vec = x.fortran_vec (); + volatile octave_idx_type nbuf = (nc > S->m2 ? nc : S->m2); + OCTAVE_LOCAL_BUFFER (double, Xx, (b_nr > nc ? b_nr : nc)); OCTAVE_LOCAL_BUFFER (double, Xz, (b_nr > nc ? b_nr : nc)); OCTAVE_LOCAL_BUFFER (double, buf, nbuf); + for (volatile octave_idx_type i = 0, idx = 0; i < b_nc; i++, idx+=nc) { octave_quit (); + for (octave_idx_type j = 0; j < b_nr; j++) { Complex c = b.xelem (j,i); Xx[j] = std::real (c); Xz[j] = std::imag (c); } + for (octave_idx_type j = nr; j < nbuf; j++) buf[j] = 0.; + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_pvec) (S->q, Xx, buf, nr); CXSPARSE_DNAME (_utsolve) (N->U, buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = nr-1; j >= 0; j--) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_happly) (N->L, j, N->B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_pvec) (S->pinv, buf, Xx, nc); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (octave_idx_type j = nr; j < nbuf; j++) buf[j] = 0.; + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_pvec) (S->q, Xz, buf, nr); CXSPARSE_DNAME (_utsolve) (N->U, buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = nr-1; j >= 0; j--) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_happly) (N->L, j, N->B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_pvec) (S->pinv, buf, Xz, nc); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (octave_idx_type j = 0; j < nc; j++) vec[j+idx] = Complex (Xx[j], Xz[j]); } @@ -785,12 +913,14 @@ sparse_qr::sparse_qr_rep::sparse_qr_rep (const SparseComplexMatrix& a, int order) : count (1), nrows (a.rows ()), ncols (a.columns ()) -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) , S (0), N (0) #endif { -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) + CXSPARSE_ZNAME () A; + A.nzmax = a.nnz (); A.m = nrows; A.n = ncols; @@ -800,10 +930,12 @@ A.i = const_cast(a.ridx ()); A.x = const_cast(reinterpret_cast (a.data ())); A.nz = -1; + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; S = CXSPARSE_ZNAME (_sqr) (order, &A, 1); N = CXSPARSE_ZNAME (_qr) (&A, S); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + if (! N) (*current_liboctave_error_handler) ("sparse_qr: sparse matrix QR factorization filled"); @@ -811,15 +943,17 @@ count = 1; #else + (*current_liboctave_error_handler) ("sparse_qr: support for CXSparse was unavailable or disabled when liboctave was built"); + #endif } template <> sparse_qr::sparse_qr_rep::~sparse_qr_rep (void) { -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) CXSPARSE_ZNAME (_sfree) (S); CXSPARSE_ZNAME (_nfree) (N); #endif @@ -829,9 +963,10 @@ SparseComplexMatrix sparse_qr::sparse_qr_rep::V (void) const { -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) // Drop zeros from V and sort // FIXME: Is the double transpose to sort necessary? + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_dropzeros) (N->L); CXSPARSE_ZNAME () *D = CXSPARSE_ZNAME (_transpose) (N->L, 1); @@ -843,16 +978,22 @@ octave_idx_type nc = N->L->n; octave_idx_type nz = N->L->nzmax; SparseComplexMatrix ret (N->L->m, nc, nz); + for (octave_idx_type j = 0; j < nc+1; j++) ret.xcidx (j) = N->L->p[j]; + for (octave_idx_type j = 0; j < nz; j++) { ret.xridx (j) = N->L->i[j]; ret.xdata (j) = reinterpret_cast(N->L->x)[j]; } + return ret; + #else + return SparseComplexMatrix (); + #endif } @@ -860,9 +1001,10 @@ SparseComplexMatrix sparse_qr::sparse_qr_rep::R (bool econ) const { -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) // Drop zeros from R and sort // FIXME: Is the double transpose to sort necessary? + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_dropzeros) (N->U); CXSPARSE_ZNAME () *D = CXSPARSE_ZNAME (_transpose) (N->U, 1); @@ -876,16 +1018,22 @@ SparseComplexMatrix ret ((econ ? (nc > nrows ? nrows : nc) : nrows), nc, nz); + for (octave_idx_type j = 0; j < nc+1; j++) ret.xcidx (j) = N->U->p[j]; + for (octave_idx_type j = 0; j < nz; j++) { ret.xridx (j) = N->U->i[j]; ret.xdata (j) = reinterpret_cast(N->U->x)[j]; } + return ret; + #else + return SparseComplexMatrix (); + #endif } @@ -893,7 +1041,7 @@ ComplexMatrix sparse_qr::sparse_qr_rep::C (const ComplexMatrix& b) const { -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) octave_idx_type b_nr = b.rows (); octave_idx_type b_nc = b.cols (); octave_idx_type nc = N->L->n; @@ -901,35 +1049,47 @@ const cs_complex_t *bvec = reinterpret_cast(b.fortran_vec ()); ComplexMatrix ret (b_nr, b_nc); Complex *vec = ret.fortran_vec (); + if (nr < 0 || nc < 0 || nr != b_nr) (*current_liboctave_error_handler) ("matrix dimension mismatch"); + if (nr == 0 || nc == 0 || b_nc == 0) ret = ComplexMatrix (nc, b_nc, Complex (0.0, 0.0)); else { OCTAVE_LOCAL_BUFFER (Complex, buf, S->m2); + for (volatile octave_idx_type j = 0, idx = 0; j < b_nc; j++, idx+=b_nr) { octave_quit (); + volatile octave_idx_type nm = (nr < nc ? nr : nc); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_ipvec) (S->pinv, bvec + idx, reinterpret_cast(buf), b_nr); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type i = 0; i < nm; i++) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_happly) (N->L, i, N->B[i], reinterpret_cast(buf)); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + for (octave_idx_type i = 0; i < b_nr; i++) vec[i+idx] = buf[i]; } } + return ret; + #else + return ComplexMatrix (); + #endif } @@ -937,11 +1097,12 @@ ComplexMatrix sparse_qr::sparse_qr_rep::Q (void) const { -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) octave_idx_type nc = N->L->n; octave_idx_type nr = nrows; ComplexMatrix ret (nr, nr); Complex *vec = ret.fortran_vec (); + if (nr < 0 || nc < 0) (*current_liboctave_error_handler) ("matrix dimension mismatch"); @@ -950,32 +1111,46 @@ else { OCTAVE_LOCAL_BUFFER (cs_complex_t, bvec, nr); + for (octave_idx_type i = 0; i < nr; i++) bvec[i] = cs_complex_t (0.0, 0.0); + OCTAVE_LOCAL_BUFFER (Complex, buf, S->m2); + for (volatile octave_idx_type j = 0, idx = 0; j < nr; j++, idx+=nr) { octave_quit (); + bvec[j] = cs_complex_t (1.0, 0.0); + volatile octave_idx_type nm = (nr < nc ? nr : nc); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_ipvec) (S->pinv, bvec, reinterpret_cast(buf), nr); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type i = 0; i < nm; i++) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_happly) (N->L, i, N->B[i], reinterpret_cast(buf)); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + for (octave_idx_type i = 0; i < nr; i++) vec[i+idx] = buf[i]; + bvec[j] = cs_complex_t (0.0, 0.0); } } + return ret.hermitian (); + #else + return ComplexMatrix (); + #endif } @@ -987,7 +1162,7 @@ { info = -1; -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) octave_idx_type nr = nrows; octave_idx_type nc = ncols; @@ -995,52 +1170,64 @@ octave_idx_type b_nr = b.rows (); octave_idx_type b_nc = b.cols (); - volatile octave_idx_type ii, x_nz; - SparseComplexMatrix x (nc, b_nc, b.nnz ()); x.xcidx (0) = 0; - x_nz = b.nnz (); - ii = 0; + + volatile octave_idx_type x_nz = b.nnz (); + volatile octave_idx_type ii = 0; + OCTAVE_LOCAL_BUFFER (double, Xx, (b_nr > nc ? b_nr : nc)); OCTAVE_LOCAL_BUFFER (double, Xz, (b_nr > nc ? b_nr : nc)); OCTAVE_LOCAL_BUFFER (double, buf, S->m2); + for (volatile octave_idx_type i = 0, idx = 0; i < b_nc; i++, idx+=nc) { octave_quit (); + for (octave_idx_type j = 0; j < b_nr; j++) { Complex c = b.xelem (j,i); Xx[j] = std::real (c); Xz[j] = std::imag (c); } + for (octave_idx_type j = nr; j < S->m2; j++) buf[j] = 0.; + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_ipvec) (S->pinv, Xx, buf, nr); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = 0; j < nc; j++) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_happly) (N->L, j, N->B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_usolve) (N->U, buf); CXSPARSE_DNAME (_ipvec) (S->q, buf, Xx, nc); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (octave_idx_type j = nr; j < S->m2; j++) buf[j] = 0.; + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_ipvec) (S->pinv, Xz, buf, nr); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = 0; j < nc; j++) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_happly) (N->L, j, N->B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_usolve) (N->U, buf); CXSPARSE_DNAME (_ipvec) (S->q, buf, Xz, nc); @@ -1049,6 +1236,7 @@ for (octave_idx_type j = 0; j < nc; j++) { Complex tmp = Complex (Xx[j], Xz[j]); + if (tmp != 0.0) { if (ii == x_nz) @@ -1059,10 +1247,12 @@ x.change_capacity (sz); x_nz = sz; } + x.xdata (ii) = tmp; x.xridx (ii++) = j; } } + x.xcidx (i+1) = ii; } @@ -1085,7 +1275,7 @@ { info = -1; -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) // These are swapped because the original matrix was transposed in // sparse_qr::solve. @@ -1096,54 +1286,66 @@ octave_idx_type b_nr = b.rows (); octave_idx_type b_nc = b.cols (); - volatile octave_idx_type ii, x_nz; - SparseComplexMatrix x (nc, b_nc, b.nnz ()); x.xcidx (0) = 0; - x_nz = b.nnz (); - ii = 0; + + volatile octave_idx_type x_nz = b.nnz (); + volatile octave_idx_type ii = 0; volatile octave_idx_type nbuf = (nc > S->m2 ? nc : S->m2); + OCTAVE_LOCAL_BUFFER (double, Xx, (b_nr > nc ? b_nr : nc)); OCTAVE_LOCAL_BUFFER (double, Xz, (b_nr > nc ? b_nr : nc)); OCTAVE_LOCAL_BUFFER (double, buf, nbuf); + for (volatile octave_idx_type i = 0, idx = 0; i < b_nc; i++, idx+=nc) { octave_quit (); + for (octave_idx_type j = 0; j < b_nr; j++) { Complex c = b.xelem (j,i); Xx[j] = std::real (c); Xz[j] = std::imag (c); } + for (octave_idx_type j = nr; j < nbuf; j++) buf[j] = 0.; + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_pvec) (S->q, Xx, buf, nr); CXSPARSE_DNAME (_utsolve) (N->U, buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = nr-1; j >= 0; j--) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_happly) (N->L, j, N->B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_pvec) (S->pinv, buf, Xx, nc); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (octave_idx_type j = nr; j < nbuf; j++) buf[j] = 0.; + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_pvec) (S->q, Xz, buf, nr); CXSPARSE_DNAME (_utsolve) (N->U, buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = nr-1; j >= 0; j--) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_happly) (N->L, j, N->B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_DNAME (_pvec) (S->pinv, buf, Xz, nc); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; @@ -1151,6 +1353,7 @@ for (octave_idx_type j = 0; j < nc; j++) { Complex tmp = Complex (Xx[j], Xz[j]); + if (tmp != 0.0) { if (ii == x_nz) @@ -1161,10 +1364,12 @@ x.change_capacity (sz); x_nz = sz; } + x.xdata (ii) = tmp; x.xridx (ii++) = j; } } + x.xcidx (i+1) = ii; } @@ -1189,7 +1394,7 @@ { info = -1; -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) octave_idx_type nr = nrows; octave_idx_type nc = ncols; @@ -1199,25 +1404,33 @@ ComplexMatrix x (nc, b_nc); cs_complex_t *vec = reinterpret_cast (x.fortran_vec ()); + OCTAVE_LOCAL_BUFFER (cs_complex_t, buf, S->m2); OCTAVE_LOCAL_BUFFER (Complex, Xx, b_nr); + for (volatile octave_idx_type i = 0, idx = 0; i < b_nc; i++, idx+=nc) { octave_quit (); + for (octave_idx_type j = 0; j < b_nr; j++) Xx[j] = b.xelem (j,i); + for (octave_idx_type j = nr; j < S->m2; j++) buf[j] = cs_complex_t (0.0, 0.0); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_ipvec) (S->pinv, reinterpret_cast(Xx), buf, nr); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = 0; j < nc; j++) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_happly) (N->L, j, N->B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_usolve) (N->U, buf); CXSPARSE_ZNAME (_ipvec) (S->q, buf, vec + idx, nc); @@ -1243,7 +1456,7 @@ { info = -1; -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) // These are swapped because the original matrix was transposed in // sparse_qr::solve. @@ -1258,29 +1471,38 @@ cs_complex_t *vec = reinterpret_cast (x.fortran_vec ()); volatile octave_idx_type nbuf = (nc > S->m2 ? nc : S->m2); + OCTAVE_LOCAL_BUFFER (cs_complex_t, buf, nbuf); OCTAVE_LOCAL_BUFFER (Complex, Xx, b_nr); OCTAVE_LOCAL_BUFFER (double, B, nr); + for (octave_idx_type i = 0; i < nr; i++) B[i] = N->B[i]; + for (volatile octave_idx_type i = 0, idx = 0; i < b_nc; i++, idx+=nc) { octave_quit (); + for (octave_idx_type j = 0; j < b_nr; j++) Xx[j] = b.xelem (j,i); + for (octave_idx_type j = nr; j < nbuf; j++) buf[j] = cs_complex_t (0.0, 0.0); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_pvec) (S->q, reinterpret_cast(Xx), buf, nr); CXSPARSE_ZNAME (_utsolve) (N->U, buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = nr-1; j >= 0; j--) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_happly) (N->L, j, B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_pvec) (S->pinv, buf, vec + idx, nc); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; @@ -1305,7 +1527,7 @@ { info = -1; -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) octave_idx_type nr = nrows; octave_idx_type nc = ncols; @@ -1313,31 +1535,38 @@ octave_idx_type b_nc = b.cols (); octave_idx_type b_nr = b.rows (); - volatile octave_idx_type ii, x_nz; - SparseComplexMatrix x (nc, b_nc, b.nnz ()); x.xcidx (0) = 0; - x_nz = b.nnz (); - ii = 0; + + volatile octave_idx_type x_nz = b.nnz (); + volatile octave_idx_type ii = 0; + OCTAVE_LOCAL_BUFFER (Complex, Xx, (b_nr > nc ? b_nr : nc)); OCTAVE_LOCAL_BUFFER (cs_complex_t, buf, S->m2); + for (volatile octave_idx_type i = 0, idx = 0; i < b_nc; i++, idx+=nc) { octave_quit (); + for (octave_idx_type j = 0; j < b_nr; j++) Xx[j] = b.xelem (j,i); + for (octave_idx_type j = nr; j < S->m2; j++) buf[j] = cs_complex_t (0.0, 0.0); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_ipvec) (S->pinv, reinterpret_cast(Xx), buf, nr); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = 0; j < nc; j++) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_happly) (N->L, j, N->B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_usolve) (N->U, buf); CXSPARSE_ZNAME (_ipvec) (S->q, buf, reinterpret_cast(Xx), nc); @@ -1346,6 +1575,7 @@ for (octave_idx_type j = 0; j < nc; j++) { Complex tmp = Xx[j]; + if (tmp != 0.0) { if (ii == x_nz) @@ -1356,10 +1586,12 @@ x.change_capacity (sz); x_nz = sz; } + x.xdata (ii) = tmp; x.xridx (ii++) = j; } } + x.xcidx (i+1) = ii; } @@ -1384,7 +1616,7 @@ { info = -1; -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) // These are swapped because the original matrix was transposed in // sparse_qr::solve. @@ -1395,38 +1627,44 @@ octave_idx_type b_nc = b.cols (); octave_idx_type b_nr = b.rows (); - volatile octave_idx_type ii, x_nz; - SparseComplexMatrix x (nc, b_nc, b.nnz ()); x.xcidx (0) = 0; - x_nz = b.nnz (); - ii = 0; + + volatile octave_idx_type x_nz = b.nnz (); + volatile octave_idx_type ii = 0; volatile octave_idx_type nbuf = (nc > S->m2 ? nc : S->m2); + OCTAVE_LOCAL_BUFFER (Complex, Xx, (b_nr > nc ? b_nr : nc)); OCTAVE_LOCAL_BUFFER (cs_complex_t, buf, nbuf); - OCTAVE_LOCAL_BUFFER (double, B, nr); + for (octave_idx_type i = 0; i < nr; i++) B[i] = N->B[i]; for (volatile octave_idx_type i = 0, idx = 0; i < b_nc; i++, idx+=nc) { octave_quit (); + for (octave_idx_type j = 0; j < b_nr; j++) Xx[j] = b.xelem (j,i); + for (octave_idx_type j = nr; j < nbuf; j++) buf[j] = cs_complex_t (0.0, 0.0); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_pvec) (S->q, reinterpret_cast(Xx), buf, nr); CXSPARSE_ZNAME (_utsolve) (N->U, buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = nr-1; j >= 0; j--) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_happly) (N->L, j, B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_pvec) (S->pinv, buf, reinterpret_cast(Xx), nc); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; @@ -1434,6 +1672,7 @@ for (octave_idx_type j = 0; j < nc; j++) { Complex tmp = Xx[j]; + if (tmp != 0.0) { if (ii == x_nz) @@ -1444,10 +1683,12 @@ x.change_capacity (sz); x_nz = sz; } + x.xdata (ii) = tmp; x.xridx (ii++) = j; } } + x.xcidx (i+1) = ii; } @@ -1472,7 +1713,7 @@ { info = -1; -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) octave_idx_type nr = nrows; octave_idx_type nc = ncols; @@ -1485,23 +1726,30 @@ ComplexMatrix x (nc, b_nc); cs_complex_t *vec = reinterpret_cast (x.fortran_vec ()); + OCTAVE_LOCAL_BUFFER (cs_complex_t, buf, S->m2); + for (volatile octave_idx_type i = 0, idx = 0, bidx = 0; i < b_nc; i++, idx+=nc, bidx+=b_nr) { octave_quit (); + for (octave_idx_type j = nr; j < S->m2; j++) buf[j] = cs_complex_t (0.0, 0.0); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_ipvec) (S->pinv, bvec + bidx, buf, nr); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = 0; j < nc; j++) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_happly) (N->L, j, N->B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_usolve) (N->U, buf); CXSPARSE_ZNAME (_ipvec) (S->q, buf, vec + idx, nc); @@ -1527,7 +1775,7 @@ { info = -1; -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) // These are swapped because the original matrix was transposed in // sparse_qr::solve. @@ -1542,32 +1790,42 @@ ComplexMatrix x (nc, b_nc); cs_complex_t *vec = reinterpret_cast (x.fortran_vec ()); + volatile octave_idx_type nbuf = (nc > S->m2 ? nc : S->m2); + OCTAVE_LOCAL_BUFFER (cs_complex_t, buf, nbuf); OCTAVE_LOCAL_BUFFER (double, B, nr); + for (octave_idx_type i = 0; i < nr; i++) B[i] = N->B[i]; + for (volatile octave_idx_type i = 0, idx = 0, bidx = 0; i < b_nc; i++, idx+=nc, bidx+=b_nr) { octave_quit (); + for (octave_idx_type j = nr; j < nbuf; j++) buf[j] = cs_complex_t (0.0, 0.0); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_pvec) (S->q, bvec + bidx, buf, nr); CXSPARSE_ZNAME (_utsolve) (N->U, buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = nr-1; j >= 0; j--) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_happly) (N->L, j, B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_pvec) (S->pinv, buf, vec + idx, nc); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + info = 0; return x; @@ -1587,7 +1845,7 @@ { info = -1; -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) octave_idx_type nr = nrows; octave_idx_type nc = ncols; @@ -1595,31 +1853,38 @@ octave_idx_type b_nc = b.cols (); octave_idx_type b_nr = b.rows (); - volatile octave_idx_type ii, x_nz; - SparseComplexMatrix x (nc, b_nc, b.nnz ()); x.xcidx (0) = 0; - x_nz = b.nnz (); - ii = 0; + + volatile octave_idx_type x_nz = b.nnz (); + volatile octave_idx_type ii = 0; + OCTAVE_LOCAL_BUFFER (Complex, Xx, (b_nr > nc ? b_nr : nc)); OCTAVE_LOCAL_BUFFER (cs_complex_t, buf, S->m2); + for (volatile octave_idx_type i = 0, idx = 0; i < b_nc; i++, idx+=nc) { octave_quit (); + for (octave_idx_type j = 0; j < b_nr; j++) Xx[j] = b.xelem (j,i); + for (octave_idx_type j = nr; j < S->m2; j++) buf[j] = cs_complex_t (0.0, 0.0); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_ipvec) (S->pinv, reinterpret_cast(Xx), buf, nr); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = 0; j < nc; j++) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_happly) (N->L, j, N->B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_usolve) (N->U, buf); CXSPARSE_ZNAME (_ipvec) (S->q, buf, reinterpret_cast(Xx), nc); @@ -1628,6 +1893,7 @@ for (octave_idx_type j = 0; j < nc; j++) { Complex tmp = Xx[j]; + if (tmp != 0.0) { if (ii == x_nz) @@ -1638,10 +1904,12 @@ x.change_capacity (sz); x_nz = sz; } + x.xdata (ii) = tmp; x.xridx (ii++) = j; } } + x.xcidx (i+1) = ii; } @@ -1666,7 +1934,7 @@ { info = -1; -#ifdef HAVE_CXSPARSE +#if defined (HAVE_CXSPARSE) // These are swapped because the original matrix was transposed in // sparse_qr::solve. @@ -1677,36 +1945,44 @@ octave_idx_type b_nc = b.cols (); octave_idx_type b_nr = b.rows (); - volatile octave_idx_type ii, x_nz; - SparseComplexMatrix x (nc, b_nc, b.nnz ()); x.xcidx (0) = 0; - x_nz = b.nnz (); - ii = 0; + + volatile octave_idx_type x_nz = b.nnz (); + volatile octave_idx_type ii = 0; volatile octave_idx_type nbuf = (nc > S->m2 ? nc : S->m2); + OCTAVE_LOCAL_BUFFER (Complex, Xx, (b_nr > nc ? b_nr : nc)); OCTAVE_LOCAL_BUFFER (cs_complex_t, buf, nbuf); OCTAVE_LOCAL_BUFFER (double, B, nr); + for (octave_idx_type i = 0; i < nr; i++) B[i] = N->B[i]; + for (volatile octave_idx_type i = 0, idx = 0; i < b_nc; i++, idx+=nc) { octave_quit (); + for (octave_idx_type j = 0; j < b_nr; j++) Xx[j] = b.xelem (j,i); + for (octave_idx_type j = nr; j < nbuf; j++) buf[j] = cs_complex_t (0.0, 0.0); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_pvec) (S->q, reinterpret_cast(Xx), buf, nr); CXSPARSE_ZNAME (_utsolve) (N->U, buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; + for (volatile octave_idx_type j = nr-1; j >= 0; j--) { octave_quit (); + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_happly) (N->L, j, B[j], buf); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; } + BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; CXSPARSE_ZNAME (_pvec) (S->pinv, buf, reinterpret_cast(Xx), nc); END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE; @@ -1714,6 +1990,7 @@ for (octave_idx_type j = 0; j < nc; j++) { Complex tmp = Xx[j]; + if (tmp != 0.0) { if (ii == x_nz) @@ -1724,10 +2001,12 @@ x.change_capacity (sz); x_nz = sz; } + x.xdata (ii) = tmp; x.xridx (ii++) = j; } } + x.xcidx (i+1) = ii; } diff -r 791dcb32b657 -r a10f60e13243 liboctave/numeric/sparse-qr.h --- a/liboctave/numeric/sparse-qr.h Tue Feb 02 12:34:06 2016 -0500 +++ b/liboctave/numeric/sparse-qr.h Tue Feb 02 16:53:01 2016 -0500 @@ -1,7 +1,7 @@ /* +Copyright (C) 2016 John W. Eaton Copyright (C) 2005-2015 David Bateman -Copyright (C) 2016 John W. Eaton This file is part of Octave. @@ -30,6 +30,11 @@ #include "CSparse.h" #include "oct-sparse.h" +// If the sparse matrix classes become templated on the element type +// (i.e., sparse_matrix), then it might be best to make the +// template parameter of this class also be the element type instead +// of the matrix type. + template class sparse_qr diff -r 791dcb32b657 -r a10f60e13243 liboctave/util/oct-sparse.h --- a/liboctave/util/oct-sparse.h Tue Feb 02 12:34:06 2016 -0500 +++ b/liboctave/util/oct-sparse.h Tue Feb 02 16:53:01 2016 -0500 @@ -24,97 +24,97 @@ #define octave_oct_sparse_h 1 #if defined (HAVE_SUITESPARSE_AMD_H) -#include +# include #elif defined (HAVE_UFSPARSE_AMD_H) -#include +# include #elif defined (HAVE_AMD_AMD_H) -#include +# include #elif defined (HAVE_AMD_H) -#include +# include #endif #if defined (HAVE_SUITESPARSE_UMFPACK_H) -#include +# include #elif defined (HAVE_UFSPARSE_UMFPACK_H) -#include +# include #elif defined (HAVE_UMFPACK_UMFPACK_H) -#include +# include #elif defined (HAVE_UMFPACK_H) -#include +# include #endif #if defined (HAVE_SUITESPARSE_COLAMD_H) -#include +# include #elif defined (HAVE_UFSPARSE_COLAMD_H) -#include +# include #elif defined (HAVE_COLAMD_COLAMD_H) -#include +# include #elif defined (HAVE_COLAMD_H) -#include +# include #endif #if defined (HAVE_SUITESPARSE_CCOLAMD_H) -#include +# include #elif defined (HAVE_UFSPARSE_CCOLAMD_H) -#include +# include #elif defined (HAVE_CCOLAMD_CCOLAMD_H) -#include +# include #elif defined (HAVE_CCOLAMD_H) -#include +# include #endif #if defined (HAVE_SUITESPARSE_CHOLMOD_H) -#include +# include #elif defined (HAVE_UFSPARSE_CHOLMOD_H) -#include +# include #elif defined (HAVE_CHOLMOD_CHOLMOD_H) -#include +# include #elif defined (HAVE_CHOLMOD_H) -#include +# include #endif #if defined (HAVE_SUITESPARSE_CS_H) -#include +# include #elif defined (HAVE_UFSPARSE_CS_H) -#include +# include #elif defined (HAVE_CXSPARSE_CS_H) -#include +# include #elif defined (HAVE_CS_H) -#include +# include #endif #if (defined (HAVE_SUITESPARSE_CHOLMOD_H) \ || defined (HAVE_UFSPARSE_CHOLMOD_H) \ || defined (HAVE_CHOLMOD_CHOLMOD_H) \ || defined (HAVE_CHOLMOD_H)) -#if defined (ENABLE_64) -#define CHOLMOD_NAME(name) cholmod_l_ ## name -#else -#define CHOLMOD_NAME(name) cholmod_ ## name -#endif +# if defined (ENABLE_64) +# define CHOLMOD_NAME(name) cholmod_l_ ## name +# else +# define CHOLMOD_NAME(name) cholmod_ ## name +# endif #endif // Cope with new SuiteSparse versions #if defined (SUITESPARSE_VERSION) -# if SUITESPARSE_VERSION >= SUITESPARSE_VER_CODE (4, 3) -# define SUITESPARSE_NAME(name) SuiteSparse_ ## name -# define SUITESPARSE_ASSIGN_FPTR(f_name, f_var, f_assign) (SuiteSparse_config.f_name = f_assign) -# define SUITESPARSE_ASSIGN_FPTR2(f_name, f_var, f_assign) (SuiteSparse_config.f_name = SUITESPARSE_NAME (f_assign)) -# else -# define SUITESPARSE_ASSIGN_FPTR(f_name, f_var, f_assign) (f_var = f_assign) -# define SUITESPARSE_ASSIGN_FPTR2(f_name, f_var, f_assign) (f_var = CHOLMOD_NAME (f_assign)) -# endif +# if SUITESPARSE_VERSION >= SUITESPARSE_VER_CODE (4, 3) +# define SUITESPARSE_NAME(name) SuiteSparse_ ## name +# define SUITESPARSE_ASSIGN_FPTR(f_name, f_var, f_assign) (SuiteSparse_config.f_name = f_assign) +# define SUITESPARSE_ASSIGN_FPTR2(f_name, f_var, f_assign) (SuiteSparse_config.f_name = SUITESPARSE_NAME (f_assign)) +# else +# define SUITESPARSE_ASSIGN_FPTR(f_name, f_var, f_assign) (f_var = f_assign) +# define SUITESPARSE_ASSIGN_FPTR2(f_name, f_var, f_assign) (f_var = CHOLMOD_NAME (f_assign)) +# endif #endif #if defined (HAVE_CXSPARSE) -# if defined (ENABLE_64) -# define CXSPARSE_DNAME(name) cs_dl ## name -# define CXSPARSE_ZNAME(name) cs_cl ## name -# else -# define CXSPARSE_DNAME(name) cs_di ## name -# define CXSPARSE_ZNAME(name) cs_ci ## name -# endif +# if defined (ENABLE_64) +# define CXSPARSE_DNAME(name) cs_dl ## name +# define CXSPARSE_ZNAME(name) cs_cl ## name +# else +# define CXSPARSE_DNAME(name) cs_di ## name +# define CXSPARSE_ZNAME(name) cs_ci ## name +# endif #endif #endif