Mercurial > forge

--- a/main/sparse/Makefile	Fri Oct 12 02:16:47 2001 +0000
+++ b/main/sparse/Makefile	Fri Oct 12 02:24:28 2001 +0000
@@ -2,8 +2,13 @@
 # $Id$
 #
 # $Log$
-# Revision 1.1  2001/10/10 19:54:49  pkienzle
-# Initial revision
+# Revision 1.2  2001/10/12 02:24:28  aadler
+# Mods to fix bugs
+# add support for all zero sparse matrices
+# add support fom complex sparse inverse
+#
+# Revision 1.1.1.1  2001/10/10 19:54:49  pkienzle
+# revised heirarchy
 #
 # Revision 1.7  2001/04/04 02:13:46  aadler
 # complete complex_sparse, templates, fix memory leaks
@@ -96,7 +101,7 @@
 NAME     =   make_sparse
 OBJLINKS =   spfind.oct sparse.oct full.oct splu.oct nnz.oct spinv.oct
 SUPERLU  =   SuperLU
-OCTVER   =   -2.1.32
+OCTVER   =  # -2.1.32
 OCTAVE   =   octave$(OCTVER)
 ifndef MKOCTFILE
 MKOCTFILE=   mkoctfile$(OCTVER)
--- a/main/sparse/README	Fri Oct 12 02:16:47 2001 +0000
+++ b/main/sparse/README	Fri Oct 12 02:24:28 2001 +0000
@@ -14,14 +14,20 @@
   d sp_test.m
   e fem_test.m
   f superlu2.0patch.diff
+
+0a. Make sure that your version of octave is compiled
+    with:
+    ./configure --enable-dl --enable-shared

-0a. This is tested to work with octave-2.1.32
+0b. This is tested to work with octave-2.1.34
     It works with >2.1.30 if you apply the mkoctfile patch
     at www.octave.org/mailing-lists/octave-maintainers/2000/175

-0b. If you already have a SuperLU subdirectory with
+0c. If you already have a SuperLU subdirectory with
     SuperLU/SRC and SuperLU/CBLAS then ignore
     steps 1-3
+    (You will have SuperLU if you downloaded this from
+     sourceforge.net)

 1. Download SuperLU from one of the following sites
                          http://www.netlib.org/scalapack/prototype
@@ -39,6 +45,17 @@
           NOTE: do not run the SuperLU makefiles -
                it doesn't build the right objects into the library

+5. Installation:
+   Copy 1) make_sparse.oct and 2) the links to make_sparse.oct
+   into any directory in your LOADPATH
+         full.oct -> make_sparse.oct
+         nnz.oct -> make_sparse.oct
+         sparse.oct -> make_sparse.oct
+         spfind.oct -> make_sparse.oct
+         spinv.oct -> make_sparse.oct
+         splu.oct -> make_sparse.oct
+
+
 This makefile assumes that the SuperLU package has been unpacked
  in this directory. It compiles files directly from their locations
  in the SuperLU source. You do not need to use SuperLU makefiles.
@@ -49,7 +66,7 @@

 AUTHOR:  Andy Adler <en254@ncf.ca>

-COPYRIGHT:
+COPYRIGHT: GPL + opensource linking
 Copyright (C) 1998-00 Andy Adler

    This program is free software; you can redistribute it and/or
@@ -64,6 +81,10 @@
 License along with this program; if not, write to the Free Software
 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

+   In addition to the terms of the GPL, you are permitted to link
+this program with any Open Source program, as defined by the
+Open Source Initiative (www.opensource.org)
+

 SUPERLU COPYRIGHT:
 The following copyright is taken from any of the
@@ -130,13 +151,7 @@

 KNOWN AND PREDICTED BUGS AND FEATURES:

-1.  It probably leaks memory. I'll try to test this more carefully
-    after I stop adding features.
-
-2.  It does not support empty matrices properly.
-    It does not support completely sparse (all zero) matricies either.
-
-3.  The attempt here is to provide compatability to the main MATLAB
+1.  The attempt here is to provide compatability to the main MATLAB
     sparse matrix entry points. The SuperLU provides slightly different
     fuctionality to MATLAB, and, therefore this toolkit provides
     different features.
@@ -147,51 +162,54 @@
     Additionally, I don't intend to provide replacements for the
     MATLAB sparse functions I consider frills. eg sprand.m, treeplot.m

-4.  SuperLU uses the int data type as indices. Since ints are normally
+2.  SuperLU uses the int data type as indices. Since ints are normally
     4 bytes (at least on i386 machines), this is not a great problem.
     There may be still a few places where an int is used to refer
     to i+j*m, which means that sparse operations on matrices where
     m*n > maxint/2 may be buggy. I've tried to fix this, but there
     may be some left.  Email me if you find any.

-5.  Sparse solve is a little faster than the MATLAB equivalent, except
+3.  Sparse solve is a little faster than the MATLAB equivalent, except
     it seems not to choose the factoring as well. Using a perm_c spec of
     2 for symetric matrices will get speed improvements of ~ 1.5.
     (For the FEM problems I use it for)

 TODO:

-add complex test cases
+add complex test cases - done

-add tests for failures (matrix size mismatch)
+add tests for failures (matrix size mismatch) - done

-go though sparse_ops.h and support all zero matrices
+go though sparse_ops.h and support all zero matrices - done

-test code for all zero and empty matrices
+test code for all zero and empty matrices - all zero done
+
+complex sparse select operations

 more efficient code for sparse \ sparse
   - fix casting spagetti

-support sparse \ sparse for complex
+support sparse \ sparse for complex - done

 move code to c++ operators - so we can use sparse from liboctave

-fix error for sparse(1)\1
-   DEBUG:sparse - matrix_to_sparse
-   DEBUG:sparse( SuperMatrix A)
-   DEBUG:sparse - numeric_conversion_function
-   DEBUG:sparse - default_numeric_conversion_function
-   DEBUG:sparse - matrix_value
-   DEBUG:sparse - sparse_to_full
-   error: operator \: nonconformant arguments (op1 is 1x1, op2 is 1x1)
-
-   #include <octave/oct.h>
-   defun_dld (jnk, args, , "jnk" ) {
-     matrix x(1,1); x(0,0)= 1; octave_value o1(x);
-     octave_value o2(2.0);
-     octave_value_list retval; retval(0)= o1/(o2);
-     return retval;
-   }
+>fix error for sparse(1)\1
+>   DEBUG:sparse - matrix_to_sparse
+>   DEBUG:sparse( SuperMatrix A)
+>   DEBUG:sparse - numeric_conversion_function
+>   DEBUG:sparse - default_numeric_conversion_function
+>   DEBUG:sparse - matrix_value
+>   DEBUG:sparse - sparse_to_full
+>   error: operator \: nonconformant arguments (op1 is 1x1, op2 is 1x1)
+>
+>   #include <octave/oct.h>
+>   defun_dld (jnk, args, , "jnk" ) {
+>     matrix x(1,1); x(0,0)= 1; octave_value o1(x);
+>     octave_value o2(2.0);
+>     octave_value_list retval; retval(0)= o1/(o2);
+>     return retval;
+>   }
+--> fixed in octave-2.1.34

 complex sparse constructor will create real sparse if matrix
 is actually real
@@ -199,4 +217,6 @@
 make debug constructor which sanity checks matrices
  - option which checks if no nonzero elems isist

-complex sparse functions for splu - spinv
+complex sparse functions for splu - spinv  - done
+
+sp_inverse_uppertriang segfaults for spinv(sparse(ones(3))). It should warn
--- a/main/sparse/complex_sparse_ops.cc	Fri Oct 12 02:16:47 2001 +0000
+++ b/main/sparse/complex_sparse_ops.cc	Fri Oct 12 02:24:28 2001 +0000
@@ -19,6 +19,11 @@
 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 02111-1307, USA.

+In addition to the terms of the GPL, you are permitted to link
+this program with any Open Source program, as defined by the
+Open Source Initiative (www.opensource.org)
+
+
 $Id$

 */
@@ -939,44 +944,81 @@
    return new octave_complex_sparse ( X );
 }

-#if 0
-
-// TODO: This isn't an efficient solution
-//  to take the inverse and multiply,
-//  on the other hand, I can rarely see this being
-//  a useful thing to do anyway
-DEFBINOP( f_s_ldiv, matrix, sparse) {
-   DEBUGMSG("complex_sparse - f_s_ldiv");
+DEFBINOP( f_cs_ldiv, matrix, complex_sparse) {
+   DEBUGMSG("complex_sparse - f_cs_ldiv");
    CAST_BINOP_ARGS ( const octave_matrix&, const octave_complex_sparse&);
-   const Matrix  A= v1.matrix_value();
-   SuperMatrix   B= v2.super_matrix();
-   return f_s_multiply(A.inverse() , B);
-} // f_s_ldiv
+   const Matrix A= v1.matrix_value().inverse(); int Anr= A.rows(); int Anc= A.cols();
+   SuperMatrix  B= v2.super_matrix(); DEFINE_SP_POINTERS_CPLX( B )
+   MATRIX_SPARSE_MUL( ComplexMatrix, Complex )
+   return X;
+} // f_cs_ldiv

-// sparse \ sparse solve
-//
-// Note: there are more efficient implemetations,
-//       but this works
-//
-// There is a wierd problem here,
-// it should be possible to multiply s=r*v2;
-// but that doesn't work
-//
-DEFBINOP( s_s_ldiv, sparse, sparse) {
-   DEBUGMSG("complex_sparse - s_s_ldiv");
-   CAST_BINOP_ARGS ( const octave_complex_sparse&, const octave_complex_sparse&);
-   SuperMatrix   A= v1.super_matrix();
-   octave_value  B= new octave_complex_sparse( v2.super_matrix() );
-   int n = A.ncol;
+DEFBINOP( cf_cs_ldiv, complex_matrix, complex_sparse) {
+   DEBUGMSG("complex_sparse - cf_cs_ldiv");
+   CAST_BINOP_ARGS ( const octave_complex_matrix&, const octave_complex_sparse&);
+   const ComplexMatrix A= v1.complex_matrix_value().inverse(); int Anr= A.rows(); int Anc= A.cols();
+   SuperMatrix  B= v2.super_matrix(); DEFINE_SP_POINTERS_CPLX( B )
+   MATRIX_SPARSE_MUL( ComplexMatrix, Complex )
+   return X;
+} // cf_cs_ldiv
+
+DEFBINOP( cf_s_ldiv, complex_matrix, sparse) {
+   DEBUGMSG("complex_sparse - cf_s_ldiv");
+   CAST_BINOP_ARGS ( const octave_complex_matrix&, const octave_sparse&);
+   const ComplexMatrix A= v1.complex_matrix_value().inverse(); int Anr= A.rows(); int Anc= A.cols();
+   SuperMatrix  B= v2.super_matrix(); DEFINE_SP_POINTERS_REAL( B )
+   MATRIX_SPARSE_MUL( ComplexMatrix, double )
+   return X;
+} // cf_s_ldiv
+
+
+DEFBINOP( s_cs_ldiv, sparse, complex_sparse) {
+   DEBUGMSG("sparse - s_cs_ldiv");
+   CAST_BINOP_ARGS ( const octave_sparse&, const octave_complex_sparse&);
+   int n = v1.columns();
    int perm_c[n];
    int permc_spec=3;
-   octave_value_list Ai= oct_sparse_inverse( A, perm_c, permc_spec );
-   octave_value retval= Ai(0)*Ai(1)*Ai(2)*Ai(3) * B;
-
-   return retval;
-} // f_s_ldiv
+   octave_value_list Si= oct_sparse_inverse( v1, perm_c, permc_spec );
+   octave_value inv= Si(0)*Si(1)*Si(2)*Si(3);
+   const octave_value& rep = inv.get_rep ();
+   SuperMatrix A = ((const octave_sparse&) rep) . super_matrix ();
+   DEFINE_SP_POINTERS_REAL( A )
+   SuperMatrix B = v2.super_matrix(); DEFINE_SP_POINTERS_CPLX( B )
+   SPARSE_SPARSE_MUL( Complex )
+   return new octave_complex_sparse ( X );
+} // s_cs_ldiv

-#endif
+DEFBINOP( cs_s_ldiv, complex_sparse, sparse) {
+   DEBUGMSG("sparse - cs_s_ldiv");
+   CAST_BINOP_ARGS ( const octave_complex_sparse&, const octave_sparse&);
+   SuperMatrix   B= v2.super_matrix(); DEFINE_SP_POINTERS_REAL( B )
+   int n = v1.columns();
+   int perm_c[n];
+   int permc_spec=3;
+   octave_value_list Si= oct_sparse_inverse( v1, perm_c, permc_spec );
+   octave_value inv= Si(0)*Si(1)*Si(2)*Si(3);
+   const octave_value& rep = inv.get_rep ();
+   SuperMatrix A = ((const octave_complex_sparse&) rep) . super_matrix ();
+   DEFINE_SP_POINTERS_CPLX( A )
+   SPARSE_SPARSE_MUL( Complex )
+   return new octave_complex_sparse ( X );
+} // s_s_ldiv
+
+DEFBINOP( cs_cs_ldiv, complex_sparse, complex_sparse) {
+   DEBUGMSG("sparse - cs_cs_ldiv");
+   CAST_BINOP_ARGS ( const octave_complex_sparse&, const octave_complex_sparse&);
+   SuperMatrix   B= v2.super_matrix(); DEFINE_SP_POINTERS_CPLX( B )
+   int n = v1.columns();
+   int perm_c[n];
+   int permc_spec=3;
+   octave_value_list Si= oct_sparse_inverse( v1, perm_c, permc_spec );
+   octave_value inv= Si(0)*Si(1)*Si(2)*Si(3);
+   const octave_value& rep = inv.get_rep ();
+   SuperMatrix A = ((const octave_complex_sparse&) rep) . super_matrix ();
+   DEFINE_SP_POINTERS_CPLX( A )
+   SPARSE_SPARSE_MUL( Complex )
+   return new octave_complex_sparse ( X );
+} // cs_cs_ldiv

 //
 // Sparse \ Full solve
@@ -1087,10 +1129,15 @@
    //
    INSTALL_BINOP (op_ldiv,     octave_complex_sparse, octave_complex_matrix, cs_cf_ldiv);
    INSTALL_BINOP (op_ldiv,     octave_complex_sparse, octave_matrix,         cs_f_ldiv);
-#if 0
-   INSTALL_BINOP (op_ldiv,     octave_matrix, octave_complex_sparse, f_s_ldiv);
-   INSTALL_BINOP (op_ldiv,     octave_complex_sparse, octave_complex_sparse, s_s_ldiv);
-#endif
+
+   INSTALL_BINOP (op_ldiv,     octave_matrix,         octave_complex_sparse, f_cs_ldiv);
+   INSTALL_BINOP (op_ldiv,     octave_complex_matrix, octave_complex_sparse, cf_cs_ldiv);
+   INSTALL_BINOP (op_ldiv,     octave_complex_matrix, octave_sparse        , cf_s_ldiv);
+
+   INSTALL_BINOP (op_ldiv,     octave_complex_sparse, octave_complex_sparse, cs_cs_ldiv);
+   INSTALL_BINOP (op_ldiv,     octave_sparse,         octave_complex_sparse, s_cs_ldiv);
+   INSTALL_BINOP (op_ldiv,     octave_complex_sparse, octave_sparse,         cs_s_ldiv);
+
    INSTALL_BINOP (op_ne,       octave_complex_sparse, octave_complex_sparse, cs_cs_ne);
    INSTALL_BINOP (op_ne,       octave_complex_sparse, octave_sparse,         cs_s_ne);
    INSTALL_BINOP (op_ne,       octave_sparse,         octave_complex_sparse, s_cs_ne);
@@ -1128,10 +1175,175 @@
    INSTALL_BINOP (op_mul,      octave_sparse,         octave_complex_sparse, s_cs_mul);
 }

+// functions for splu and inverse
+
+//
+// This routine converts from the SuperNodal Matrices
+// L,U to the Comp Col format.
+//
+// It is modified from SuperLU/MATLAB/mexsuperlu.c
+//
+// It seems to produce badly formatted U. ie the
+//   row indeces are unsorted.
+// Need to call function to fix this.
+//
+
+void
+LUextract(SuperMatrix *L, SuperMatrix *U, Complex *Lval, int *Lrow,
+          int *Lcol, Complex *Uval, int *Urow, int *Ucol, int *snnzL,
+          int *snnzU)
+{
+   DEBUGMSG("LUextract-complex");
+   int         i, j, k;
+   int         upper;
+   int         fsupc, istart, nsupr;
+   int         lastl = 0, lastu = 0;
+   Complex     *SNptr;
+
+   SCformat * Lstore = (SCformat *) L->Store;
+   NCformat * Ustore = (NCformat *) U->Store;
+   Lcol[0] = 0;
+   Ucol[0] = 0;
+
+   /* for each supernode */
+   for (k = 0; k <= Lstore->nsuper; ++k) {
+
+       fsupc = L_FST_SUPC(k);
+       istart = L_SUB_START(fsupc);
+       nsupr = L_SUB_START(fsupc+1) - istart;
+       upper = 1;
+
+       /* for each column in the supernode */
+       for (j = fsupc; j < L_FST_SUPC(k+1); ++j) {
+           SNptr = &((Complex*)Lstore->nzval)[L_NZ_START(j)];
+
+           /* Extract U */
+           for (i = U_NZ_START(j); i < U_NZ_START(j+1); ++i) {
+               Uval[lastu] = ((Complex*)Ustore->nzval)[i];
+               if (Uval[lastu] != 0.0) Urow[lastu++] = U_SUB(i);
+           }
+           /* upper triangle in the supernode */
+           for (i = 0; i < upper; ++i) {
+               Uval[lastu] = SNptr[i];
+               if (Uval[lastu] != 0.0) Urow[lastu++] = L_SUB(istart+i);
+           }
+           Ucol[j+1] = lastu;
+
+           /* Extract L */
+           Lval[lastl] = 1.0; /* unit diagonal */
+           Lrow[lastl++] = L_SUB(istart + upper - 1);
+           for (i = upper; i < nsupr; ++i) {
+               Lval[lastl] = SNptr[i];
+                /* Matlab doesn't like explicit zero. */
+               if (Lval[lastl] != 0.0) Lrow[lastl++] = L_SUB(istart+i);
+           }
+           Lcol[j+1] = lastl;
+
+           ++upper;
+
+       } /* for j ... */
+
+   } /* for k ... */
+
+   *snnzL = lastl;
+   *snnzU = lastu;
+}
+
+void
+complex_sparse_LU_fact(SuperMatrix A,
+                       SuperMatrix *LC,
+                       SuperMatrix *UC,
+                       int * perm_c,
+                       int * perm_r,
+                       int permc_spec )
+{
+   DEBUGMSG("complex_sparse_LU_fact");
+   int m = A.nrow;
+   int n = A.ncol;
+   char   refact[1] = {'N'};
+   double thresh    = 1.0;     // diagonal pivoting threshold
+   double drop_tol  = 0.0;     // drop tolerance parameter
+   int    info;
+   int    panel_size = sp_ienv(1);
+   int    relax      = sp_ienv(2);
+   int    etree[n];
+   SuperMatrix Ac;
+   SuperMatrix L,U;
+
+   StatInit(panel_size, relax);
+
+   oct_sparse_do_permc( permc_spec, perm_c, A);
+   // Apply column perm to A and compute etree.
+   sp_preorder(refact, &A, perm_c, etree, &Ac);
+
+   zgstrf(refact, &Ac, thresh, drop_tol, relax, panel_size, etree,
+           NULL, 0, perm_r, perm_c, &L, &U, &info);
+   if ( info < 0 )
+      SP_FATAL_ERR ("LU factorization error");
+
+   int      snnzL, snnzU;
+
+   int       nnzL = ((SCformat*)L.Store)->nnz;
+   Complex * Lval = (Complex *) malloc( nnzL * sizeof(Complex) );
+   int     * Lrow = (    int *) malloc( nnzL * sizeof(    int) );
+   int     * Lcol = (    int *) malloc( (n+1)* sizeof(    int) );
+
+   int       nnzU = ((NCformat*)U.Store)->nnz;
+   Complex * Uval = (Complex *) malloc( nnzU * sizeof(Complex) );
+   int     * Urow = (    int *) malloc( nnzU * sizeof(    int) );
+   int     * Ucol = (    int *) malloc( (n+1)* sizeof(    int) );
+
+   LUextract(&L, &U, Lval, Lrow, Lcol, Uval, Urow, Ucol, &snnzL, &snnzU);
+   // we need to use the snnz values (squeezed vs. unsqueezed)
+   zCreate_CompCol_Matrix(LC, m, n, snnzL, (doublecomplex*) Lval, Lrow, Lcol, NC, _Z, GE);
+   zCreate_CompCol_Matrix(UC, m, n, snnzU, (doublecomplex*) Uval, Urow, Ucol, NC, _Z, GE);
+
+   fix_row_order_complex( *LC );
+   fix_row_order_complex( *UC );
+
+   oct_sparse_Destroy_SuperMatrix( L ) ;
+   oct_sparse_Destroy_SuperMatrix( U ) ;
+   oct_sparse_Destroy_SuperMatrix( Ac ) ;
+   StatFree();
+
+#if 0
+   printf("verify A\n");  oct_sparse_verify_supermatrix( A );
+   printf("verify LC\n"); oct_sparse_verify_supermatrix( *LC );
+   printf("verify UC\n"); oct_sparse_verify_supermatrix( *UC );
+#endif
+
+} // complex_sparse_LU_fact(
+
+FIX_ROW_ORDER_SORT_FUNCTIONS( Complex )
+
+void
+fix_row_order_complex( SuperMatrix X )
+{
+   DEBUGMSG("fix_row_order_complex");
+   DEFINE_SP_POINTERS_CPLX( X )
+   FIX_ROW_ORDER_FUNCTIONS
+}
+
+SuperMatrix
+complex_sparse_inv_uppertriang( SuperMatrix U)
+{
+   DEBUGMSG("sparse_inv_uppertriang");
+   DEFINE_SP_POINTERS_CPLX( U )
+   int    nnzU= NCFU->nnz;
+   SPARSE_INV_UPPERTRIANG( Complex )
+   return create_SuperMatrix( Unr,Unc,cx, coefX, ridxX, cidxX );
+}
+
 /*
  * $Log$
- * Revision 1.1  2001/10/10 19:54:49  pkienzle
- * Initial revision
+ * Revision 1.2  2001/10/12 02:24:28  aadler
+ * Mods to fix bugs
+ * add support for all zero sparse matrices
+ * add support fom complex sparse inverse
+ *
+ * Revision 1.7  2001/09/23 17:46:12  aadler
+ * updated README
+ * modified licence to GPL plus link to opensource programmes
  *
  * Revision 1.6  2001/04/04 02:13:46  aadler
  * complete complex_sparse, templates, fix memory leaks
--- a/main/sparse/make_sparse.cc	Fri Oct 12 02:16:47 2001 +0000
+++ b/main/sparse/make_sparse.cc	Fri Oct 12 02:24:28 2001 +0000
@@ -17,6 +17,10 @@
 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 02111-1307, USA.

+In addition to the terms of the GPL, you are permitted to link
+this program with any Open Source program, as defined by the
+Open Source Initiative (www.opensource.org)
+
 $Id$

 */
@@ -252,8 +256,14 @@
 DEFINE_OV_TYPEID_FUNCTIONS_AND_DATA (octave_complex_sparse, "complex_sparse");
 /*
  * $Log$
- * Revision 1.1  2001/10/10 19:54:49  pkienzle
- * Initial revision
+ * Revision 1.2  2001/10/12 02:24:28  aadler
+ * Mods to fix bugs
+ * add support for all zero sparse matrices
+ * add support fom complex sparse inverse
+ *
+ * Revision 1.7  2001/09/23 17:46:12  aadler
+ * updated README
+ * modified licence to GPL plus link to opensource programmes
  *
  * Revision 1.6  2001/04/04 02:13:46  aadler
  * complete complex_sparse, templates, fix memory leaks
--- a/main/sparse/make_sparse.h	Fri Oct 12 02:16:47 2001 +0000
+++ b/main/sparse/make_sparse.h	Fri Oct 12 02:24:28 2001 +0000
@@ -19,8 +19,10 @@
 $Id$

 $Log$
-Revision 1.1  2001/10/10 19:54:49  pkienzle
-Initial revision
+Revision 1.2  2001/10/12 02:24:28  aadler
+Mods to fix bugs
+add support for all zero sparse matrices
+add support fom complex sparse inverse

 Revision 1.9  2001/04/04 02:13:46  aadler
 complete complex_sparse, templates, fix memory leaks
@@ -256,6 +258,8 @@

 SuperMatrix
 oct_sparse_transpose ( SuperMatrix X ) ;
+SuperMatrix
+oct_complex_sparse_transpose ( SuperMatrix X ) ;

 SuperMatrix
 oct_matrix_to_sparse(const Matrix & A) ;
@@ -289,7 +293,16 @@
                              ColumnVector& cidxA ) ;

 octave_value_list
-oct_sparse_inverse( SuperMatrix A,
+oct_sparse_inverse( const octave_sparse& A,
+                    int* perm_c,
+                    int permc_spec) ;
+octave_value_list
+oct_sparse_inverse( const octave_complex_sparse& Asp,
+                    int* perm_c,
+                    int permc_spec );
+
+octave_value_list
+oct_sparse_inverse( const octave_complex_sparse& A,
                     int* perm_c,
                     int permc_spec) ;

@@ -347,9 +360,33 @@
                     int * ridx,
                     int * cidx );

+void
+LUextract(SuperMatrix *L, SuperMatrix *U, double *Lval, int *Lrow,
+          int *Lcol, double *Uval, int *Urow, int *Ucol, int *snnzL,
+          int *snnzU);
+void
+LUextract(SuperMatrix *L, SuperMatrix *U, Complex *Lval, int *Lrow,
+          int *Lcol, Complex *Uval, int *Urow, int *Ucol, int *snnzL,
+          int *snnzU);
+void
+sparse_LU_fact(SuperMatrix A, SuperMatrix *LC, SuperMatrix *UC,
+               int * perm_c, int * perm_r, int permc_spec );
+void
+complex_sparse_LU_fact(SuperMatrix A, SuperMatrix *LC, SuperMatrix *UC,
+                       int * perm_c, int * perm_r, int permc_spec );
+void
+fix_row_order( SuperMatrix X );
+void
+fix_row_order_complex( SuperMatrix X );
+
 int
 complex_sparse_verify_doublecomplex_type(void);

+SuperMatrix
+sparse_inv_uppertriang( SuperMatrix U);
+SuperMatrix
+complex_sparse_inv_uppertriang( SuperMatrix U);
+

 // comparison function for sort in make_sparse
 typedef struct { unsigned long val;
@@ -389,3 +426,12 @@
 #include <dmalloc.h>
 #endif

+// Build the permutation matrix
+//  remember to add 1 because assemble_sparse is 1 based
+#define BUILD_PERM_VECTORS( ridx, cidx, coef, perm, n ) \
+      ColumnVector ridx(n), cidx(n), coef(n); \
+      for (int i=0; i<n; i++) { \
+         ridx(i)= 1.0 + i; \
+         cidx(i)= 1.0 + perm[i];  \
+         coef(i)= 1.0; \
+      }
--- a/main/sparse/sp_test.m	Fri Oct 12 02:16:47 2001 +0000
+++ b/main/sparse/sp_test.m	Fri Oct 12 02:24:28 2001 +0000
@@ -22,16 +22,16 @@
 prefer_zero_one_indexing= 0;
 page_screen_output=1;
 SZ=10;
-NTRIES=1.0;
+NTRIES=100;

 res=zeros(1,200); # should be enough space

 for tries = 1:NTRIES;

-# print some relevant info from ps
-if 1
+% print some relevant info from ps
+if 0
    printf("t=%03d: %s", tries, system(["ps -uh ", num2str(getpid)],1 ) );
-endif
+end

 % choose some random sizes for the test matrices
    sz=floor(abs(randn(1,5))*SZ + 1);
@@ -41,15 +41,15 @@

 % choose random test matrices
    arf=zeros( sz1,sz2 );
-   nz= sz12*rand(1)/2+1;
+   nz= sz12*rand(1)/2+0;
    arf(ceil(rand(nz,1)*sz12))=randn(nz,1);

    brf=zeros( sz1,sz2 );
-   nz= sz12*rand(1)/2+1;
+   nz= sz12*rand(1)/2+0;
    brf(ceil(rand(nz,1)*sz12))=randn(nz,1);

    crf=zeros( sz2,sz3 );
-   nz= sz23*rand(1)/2+1;
+   nz= sz23*rand(1)/2+0;
    crf(ceil(rand(nz,1)*sz23))=randn(nz,1);

    while (1)
@@ -105,7 +105,7 @@
    sely= ceil( sz1*rand(1,2*ceil( rand(1)*sz1 )) );
    sel1= ceil(sz12*rand(1,2*ceil( rand(1)*sz12 )))';

-
+% save save_fil arf brf crf erf acf bcf ccf dcf ecf

    ars= sparse(arf);
    brs= sparse(brf);
@@ -120,169 +120,169 @@
    dcs= sparse(dcf);
    ecs= sparse(ecf);

-   i=1   ;     % i=  1
+   i=1   ;     % i= 1

 %
 % test sparse assembly and disassembly
 %
    res(i)= res(i)     +all(all( ars == ars ));
-   i=i+1 ;     % i=  2
+   i=i+1;      % i= 2
    res(i)= res(i)     +all(all( ars == arf ));
-   i=i+1 ;     % i=  3
+   i=i+1;      % i= 3
    res(i)= res(i)     +all(all( arf == ars ));
-   i=i+1 ;     % i=  4
+   i=i+1;      % i= 4
    res(i)= res(i)     +all(all( acs == acs ));
-   i=i+1 ;     % i=  5
+   i=i+1;      % i= 5
    res(i)= res(i)     +all(all( acs == acf ));
-   i=i+1 ;     % i=  6
+   i=i+1;      % i= 6
    res(i)= res(i)     +all(all( acf == acs ));
-   i=i+1 ;     % i=  7
+   i=i+1;      % i= 7

    [ii,jj,vv,nr,nc] = spfind( ars );
    res(i)= res(i)     +all(all( arf == full( sparse(ii,jj,vv,nr,nc) ) ));
-   i=i+1 ;     % i=  8
+   i=i+1;      % i= 8
    [ii,jj,vv,nr,nc] = spfind( acs );
    res(i)= res(i)     +all(all( acf == full( sparse(ii,jj,vv,nr,nc) ) ));
-   i=i+1 ;     % i=  9
+   i=i+1;      % i= 9
    res(i)= res(i)     +( nnz(ars) == sum(sum( arf!=0 )) );
-   i=i+1 ;     % i= 10
+   i=i+1;      % i=10
    res(i)= res(i)     +   (  nnz(ars) == nnz(arf));
-   i=i+1 ;     % i= 11
+   i=i+1;      % i=11
    res(i)= res(i)     +( nnz(acs) == sum(sum( acf!=0 )) );
-   i=i+1 ;     % i= 12
+   i=i+1;      % i=12
    res(i)= res(i)     +   (  nnz(acs) == nnz(acf));
-   i=i+1 ;     % i= 13
+   i=i+1;      % i=13
 %
 % test sparse op scalar operations
 %
    res(i)= res(i)     +all(all( (ars==frn) == (arf==frn) ));
-   i=i+1 ;     % i= 14
+   i=i+1;      % i=14
    res(i)= res(i)     +all(all( (frn==ars) == (frn==arf) ));
-   i=i+1 ;     % i= 15
+   i=i+1;      % i=15
    res(i)= res(i)     +all(all( (frn+ars) == (frn+arf) ));
-   i=i+1 ;     % i= 16
+   i=i+1;      % i=16
    res(i)= res(i)     +all(all( (ars+frn) == (arf+frn) ));
-   i=i+1 ;     % i= 17
+   i=i+1;      % i=17
    res(i)= res(i)     +all(all( (frn-ars) == (frn-arf) ));
-   i=i+1 ;     % i= 18
+   i=i+1;      % i=18
    res(i)= res(i)     +all(all( (ars-frn) == (arf-frn) ));
-   i=i+1 ;     % i= 19
+   i=i+1;      % i=19
    res(i)= res(i)     +all(all( (frn*ars) == (frn*arf) ));
-   i=i+1 ;     % i= 20
+   i=i+1;      % i=20
    res(i)= res(i)     +all(all( (ars*frn) == (arf*frn) ));
-   i=i+1 ;     % i= 21
+   i=i+1;      % i=21
    res(i)= res(i)     +all(all( (frn.*ars) == (frn.*arf) ));
-   i=i+1 ;     % i= 22
+   i=i+1;      % i=22
    res(i)= res(i)     +all(all( (ars.*frn) == (arf.*frn) ));
-   i=i+1 ;     % i= 23
+   i=i+1;      % i=23
    res(i)= res(i)     +all(all( abs( (frn\ars) - (frn\arf) )<1e-13 ));
-   i=i+1 ;     % i= 24
+   i=i+1;      % i=24
    res(i)= res(i)     +all(all( abs( (ars/frn) - (arf/frn) )<1e-13 ));
-   i=i+1 ;     % i= 25
+   i=i+1;      % i=25
 %
 % test sparse op complex scalar operations
 %
    res(i)= res(i)     +all(all( (ars==fcn) == (arf==fcn) ));
-   i=i+1 ;     % i= 26
+   i=i+1;      % i=26
    res(i)= res(i)     +all(all( (fcn==ars) == (fcn==arf) ));
-   i=i+1 ;     % i= 27
+   i=i+1;      % i=27
    res(i)= res(i)     +all(all( (fcn+ars) == (fcn+arf) ));
-   i=i+1 ;     % i= 28
+   i=i+1;      % i=28
    res(i)= res(i)     +all(all( (ars+fcn) == (arf+fcn) ));
-   i=i+1 ;     % i= 29
+   i=i+1;      % i=29
    res(i)= res(i)     +all(all( (fcn-ars) == (fcn-arf) ));
-   i=i+1 ;     % i= 30
+   i=i+1;      % i=30
    res(i)= res(i)     +all(all( (ars-fcn) == (arf-fcn) ));
-   i=i+1 ;     % i= 31
+   i=i+1;      % i=31
    res(i)= res(i)     +all(all( (fcn*ars) == (fcn*arf) ));
-   i=i+1 ;     % i= 32
+   i=i+1;      % i=32
    res(i)= res(i)     +all(all( (ars*fcn) == (arf*fcn) ));
-   i=i+1 ;     % i= 33
+   i=i+1;      % i=33
    res(i)= res(i)     +all(all( (fcn.*ars) == (fcn.*arf) ));
-   i=i+1 ;     % i= 34
+   i=i+1;      % i=34
    res(i)= res(i)     +all(all( (ars.*fcn) == (arf.*fcn) ));
-   i=i+1 ;     % i= 35
+   i=i+1;      % i=35
    res(i)= res(i)     +all(all( abs( (fcn\ars) - (fcn\arf) )<1e-13 ));
-   i=i+1 ;     % i= 36
+   i=i+1;      % i=36
    res(i)= res(i)     +all(all( abs( (ars/fcn) - (arf/fcn) )<1e-13 ));
-   i=i+1 ;     % i= 37
+   i=i+1;      % i=37
 %
 % test complex sparse op scalar operations
 %
    res(i)= res(i)     +all(all( (acs==frn) == (acf==frn) ));
-   i=i+1 ;     % i= 38
+   i=i+1;      % i=38
    res(i)= res(i)     +all(all( (frn==acs) == (frn==acf) ));
-   i=i+1 ;     % i= 39
+   i=i+1;      % i=39
    res(i)= res(i)     +all(all( (frn+acs) == (frn+acf) ));
-   i=i+1 ;     % i= 40
+   i=i+1;      % i=40
    res(i)= res(i)     +all(all( (acs+frn) == (acf+frn) ));
-   i=i+1 ;     % i= 41
+   i=i+1;      % i=41
    res(i)= res(i)     +all(all( (frn-acs) == (frn-acf) ));
-   i=i+1 ;     % i= 42
+   i=i+1;      % i=42
    res(i)= res(i)     +all(all( (acs-frn) == (acf-frn) ));
-   i=i+1 ;     % i= 43
+   i=i+1;      % i=43
    res(i)= res(i)     +all(all( (frn*acs) == (frn*acf) ));
-   i=i+1 ;     % i= 44
+   i=i+1;      % i=44
    res(i)= res(i)     +all(all( (acs*frn) == (acf*frn) ));
-   i=i+1 ;     % i= 45
+   i=i+1;      % i=45
    res(i)= res(i)     +all(all( (frn.*acs) == (frn.*acf) ));
-   i=i+1 ;     % i= 46
+   i=i+1;      % i=46
    res(i)= res(i)     +all(all( (acs.*frn) == (acf.*frn) ));
-   i=i+1 ;     % i= 47
+   i=i+1;      % i=47
    res(i)= res(i)     +all(all( abs( (frn\acs) - (frn\acf) )<1e-13 ));
-   i=i+1 ;     % i= 48
+   i=i+1;      % i=48
    res(i)= res(i)     +all(all( abs( (acs/frn) - (acf/frn) )<1e-13 ));
-   i=i+1 ;     % i= 49
+   i=i+1;      % i=49
 %
 % test complex sparse op complex scalar operations
 %
    res(i)= res(i)     +all(all( (acs==fcn) == (acf==fcn) ));
-   i=i+1 ;     % i= 50
+   i=i+1;      % i=50
    res(i)= res(i)     +all(all( (fcn==acs) == (fcn==acf) ));
-   i=i+1 ;     % i= 51
+   i=i+1;      % i=51
    res(i)= res(i)     +all(all( (fcn+acs) == (fcn+acf) ));
-   i=i+1 ;     % i= 52
+   i=i+1;      % i=52
    res(i)= res(i)     +all(all( (acs+fcn) == (acf+fcn) ));
-   i=i+1 ;     % i= 53
+   i=i+1;      % i=53
    res(i)= res(i)     +all(all( (fcn-acs) == (fcn-acf) ));
-   i=i+1 ;     % i= 54
+   i=i+1;      % i=54
    res(i)= res(i)     +all(all( (acs-fcn) == (acf-fcn) ));
-   i=i+1 ;     % i= 55
+   i=i+1;      % i=55
    res(i)= res(i)     +all(all( (fcn*acs) == (fcn*acf) ));
-   i=i+1 ;     % i= 56
+   i=i+1;      % i=56
    res(i)= res(i)     +all(all( (acs*fcn) == (acf*fcn) ));
-   i=i+1 ;     % i= 57
+   i=i+1;      % i=57
    res(i)= res(i)     +all(all( (fcn.*acs) == (fcn.*acf) ));
-   i=i+1 ;     % i= 58
+   i=i+1;      % i=58
    res(i)= res(i)     +all(all( (acs.*fcn) == (acf.*fcn) ));
-   i=i+1 ;     % i= 59
+   i=i+1;      % i=59
    res(i)= res(i)     +all(all( abs( (fcn\acs) - (fcn\acf) )<1e-13 ));
-   i=i+1 ;     % i= 60
+   i=i+1;      % i=60
    res(i)= res(i)     +all(all( abs( (acs/fcn) - (acf/fcn) )<1e-13 ));
-   i=i+1 ;     % i= 61
+   i=i+1;      % i=61

 %
 % sparse uary ops
 %
    res(i)= res(i)     +all(all( ars.' ==  arf.' ));
-   i=i+1 ;     % i= 62
+   i=i+1;      % i=62
    res(i)= res(i)     +all(all( ars'  ==  arf' ));
-   i=i+1 ;     % i= 63
+   i=i+1;      % i=63
    res(i)= res(i)     +all(all( -ars  == -arf ));
-   i=i+1 ;     % i= 64
+   i=i+1;      % i=64
    res(i)= res(i)     +all(all( ~ars  == ~arf ));
-   i=i+1 ;     % i= 65
+   i=i+1;      % i=65
 %
 % complex sparse uary ops
 %
    res(i)= res(i)     +all(all( acs.' ==  acf.' ));
-   i=i+1 ;     % i= 66
+   i=i+1;      % i=66
    res(i)= res(i)     +all(all( acs'  ==  acf' ));
-   i=i+1 ;     % i= 67
+   i=i+1;      % i=67
    res(i)= res(i)     +all(all( -acs  == -acf ));
-   i=i+1 ;     % i= 68
+   i=i+1;      % i=68
    res(i)= res(i)     +all(all( ~acs  == ~acf ));
-   i=i+1 ;     % i= 69
+   i=i+1;      % i=69

 %
 % sparse op sparse and  sparse op matrix
@@ -293,52 +293,52 @@
    # FIXME: this breaks if drs is 1x1
    rdif= abs(drs\erf - df_ef) < abs(mag*df_ef);
    res(i)= res(i)     +all(all( rdif ));
-   i=i+1 ;     % i= 70
+   i=i+1;      % i=70

    rdif= abs(drf\ers - df_ef) < abs(mag*df_ef);
    res(i)= res(i)     +all(all( rdif ));
-   i=i+1 ;     % i= 71
+   i=i+1;      % i=71

    rdif= abs(drs\ers - df_ef) < abs(mag*df_ef);
    res(i)= res(i)     +all(all( rdif ));
-   i=i+1 ;     % i= 72
+   i=i+1;      % i=72

    res(i)= res(i)     +all(all( ars+brs == arf+brf ));
-   i=i+1 ;     % i= 73
+   i=i+1;      % i=73
    res(i)= res(i)     +all(all( arf+brs == arf+brf ));
-   i=i+1 ;     % i= 74
+   i=i+1;      % i=74
    res(i)= res(i)     +all(all( ars+brf == arf+brf ));
-   i=i+1 ;     % i= 75
+   i=i+1;      % i=75
    res(i)= res(i)     +all(all( ars-brs == arf-brf ));
-   i=i+1 ;     % i= 76
+   i=i+1;      % i=76
    res(i)= res(i)     +all(all( arf-brs == arf-brf ));
-   i=i+1 ;     % i= 77
+   i=i+1;      % i=77
    res(i)= res(i)     +all(all( ars-brf == arf-brf ));
-   i=i+1 ;     % i= 78
+   i=i+1;      % i=78
    res(i)= res(i)     +all(all( (ars>brs) == (arf>brf) ));
-   i=i+1 ;     % i= 79
+   i=i+1;      % i=79
    res(i)= res(i)     +all(all( (ars<brs) == (arf<brf) ));
-   i=i+1 ;     % i= 80
+   i=i+1;      % i=80
    res(i)= res(i)     +all(all( (ars!=brs) == (arf!=brf) ));
-   i=i+1 ;     % i= 81
+   i=i+1;      % i=81
    res(i)= res(i)     +all(all( (ars>=brs) == (arf>=brf) ));
-   i=i+1 ;     % i= 82
+   i=i+1;      % i=82
    res(i)= res(i)     +all(all( (ars<=brs) == (arf<=brf) ));
-   i=i+1 ;     % i= 83
+   i=i+1;      % i=83
    res(i)= res(i)     +all(all( (ars==brs) == (arf==brf) ));
-   i=i+1 ;     % i= 84
+   i=i+1;      % i=84
    res(i)= res(i)     +all(all( ars.*brs == arf.*brf ));
-   i=i+1 ;     % i= 85
+   i=i+1;      % i=85
    res(i)= res(i)     +all(all( arf.*brs == arf.*brf ));
-   i=i+1 ;     % i= 86
+   i=i+1;      % i=86
    res(i)= res(i)     +all(all( ars.*brf == arf.*brf ));
-   i=i+1 ;     % i= 87
+   i=i+1;      % i=87
    res(i)= res(i)     +all(all( ars*crs == arf*crf ));
-   i=i+1 ;     % i= 88
+   i=i+1;      % i=88
    res(i)= res(i)     +all(all( arf*crs == arf*crf ));
-   i=i+1 ;     % i= 89
+   i=i+1;      % i=89
    res(i)= res(i)     +all(all( ars*crf == arf*crf ));
-   i=i+1 ;     % i= 90
+   i=i+1;      % i=90

 %
 % sparse op complex sparse and  sparse op complex matrix
@@ -349,53 +349,53 @@
    # FIXME: this breaks if drs is 1x1
    rdif= abs(drs\ecf - df_ef) < abs(mag*df_ef);
    res(i)= res(i)     +all(all( rdif ));
-   i=i+1 ;     % i= 91
+   i=i+1;      % i=91

    rdif= abs(drf\ecs - df_ef) < abs(mag*df_ef);
    res(i)= res(i)     +all(all( rdif ));
-   i=i+1 ;     % i= 92
+   i=i+1;      % i=92

 # TODO: not avail yet
 #  rdif= abs(drs\ecs - df_ef) < abs(mag*df_ef);
 #  res(i)= res(i)     +all(all( rdif ));
-#  i=i+1 ;     % i= 93
+#  i=i+1;      % i=93

    res(i)= res(i)     +all(all( ars+bcs == arf+bcf ));
-   i=i+1 ;     % i= 94
+   i=i+1;      % i=94
    res(i)= res(i)     +all(all( arf+bcs == arf+bcf ));
-   i=i+1 ;     % i= 95
+   i=i+1;      % i=95
    res(i)= res(i)     +all(all( ars+bcf == arf+bcf ));
-   i=i+1 ;     % i= 96
+   i=i+1;      % i=96
    res(i)= res(i)     +all(all( ars-bcs == arf-bcf ));
-   i=i+1 ;     % i= 97
+   i=i+1;      % i=97
    res(i)= res(i)     +all(all( arf-bcs == arf-bcf ));
-   i=i+1 ;     % i= 98
+   i=i+1;      % i=98
    res(i)= res(i)     +all(all( ars-bcf == arf-bcf ));
-   i=i+1 ;     % i= 99
+   i=i+1;      % i=99
    res(i)= res(i)     +all(all( (ars>bcs) == (arf>bcf) ));
-   i=i+1 ;     % i=100
+   i=i+1;      % i=100
    res(i)= res(i)     +all(all( (ars<bcs) == (arf<bcf) ));
-   i=i+1 ;     % i=101
+   i=i+1;      % i=101
    res(i)= res(i)     +all(all( (ars!=bcs) == (arf!=bcf) ));
-   i=i+1 ;     % i=102
+   i=i+1;      % i=102
    res(i)= res(i)     +all(all( (ars>=bcs) == (arf>=bcf) ));
-   i=i+1 ;     % i=103
+   i=i+1;      % i=103
    res(i)= res(i)     +all(all( (ars<=bcs) == (arf<=bcf) ));
-   i=i+1 ;     % i=104
+   i=i+1;      % i=104
    res(i)= res(i)     +all(all( (ars==bcs) == (arf==bcf) ));
-   i=i+1 ;     % i=105
+   i=i+1;      % i=105
    res(i)= res(i)     +all(all( ars.*bcs == arf.*bcf ));
-   i=i+1 ;     % i=106
+   i=i+1;      % i=106
    res(i)= res(i)     +all(all( arf.*bcs == arf.*bcf ));
-   i=i+1 ;     % i=107
+   i=i+1;      % i=107
    res(i)= res(i)     +all(all( ars.*bcf == arf.*bcf ));
-   i=i+1 ;     % i=108
+   i=i+1;      % i=108
    res(i)= res(i)     +all(all( ars*ccs == arf*ccf ));
-   i=i+1 ;     % i=109
+   i=i+1;      % i=109
    res(i)= res(i)     +all(all( arf*ccs == arf*ccf ));
-   i=i+1 ;     % i=110
+   i=i+1;      % i=110
    res(i)= res(i)     +all(all( ars*ccf == arf*ccf ));
-   i=i+1 ;     % i=111
+   i=i+1;      % i=111

 %
 % complex sparse op sparse and  complex sparse op matrix
@@ -406,53 +406,58 @@
    # FIXME: this breaks if drs is 1x1
    rdif= abs(dcs\erf - df_ef) < abs(mag*df_ef);
    res(i)= res(i)     +all(all( rdif ));
-   i=i+1 ;     % i=112
+   i=i+1;      % i=112

    rdif= abs(dcf\ers - df_ef) < abs(mag*df_ef);
    res(i)= res(i)     +all(all( rdif ));
-   i=i+1 ;     % i=113
+   i=i+1;      % i=113

-# TODO: not avail yet
-#  rdif= abs(dcs\ers - df_ef) < abs(mag*df_ef);
-#  res(i)= res(i)     +all(all( rdif ));
-#  i=i+1 ;     % i=114
+   df_ef= dcf\erf;
+   rdif= abs(dcs\ers - df_ef) < abs(mag*df_ef);
+   res(i)= res(i)     +all(all( rdif ));
+   i=i+1;      % i=114
+
+   df_ef= drf\ecf;
+   rdif= abs(drs\ecs - df_ef) < abs(mag*df_ef);
+   res(i)= res(i)     +all(all( rdif ));
+   i=i+1;      % i=115

    res(i)= res(i)     +all(all( acs+brs == acf+brf ));
-   i=i+1 ;     % i=115
+   i=i+1;      % i=116
    res(i)= res(i)     +all(all( acf+brs == acf+brf ));
-   i=i+1 ;     % i=116
+   i=i+1;      % i=117
    res(i)= res(i)     +all(all( acs+brf == acf+brf ));
-   i=i+1 ;     % i=117
+   i=i+1;      % i=118
    res(i)= res(i)     +all(all( acs-brs == acf-brf ));
-   i=i+1 ;     % i=118
+   i=i+1;      % i=119
    res(i)= res(i)     +all(all( acf-brs == acf-brf ));
-   i=i+1 ;     % i=119
+   i=i+1;      % i=120
    res(i)= res(i)     +all(all( acs-brf == acf-brf ));
-   i=i+1 ;     % i=120
+   i=i+1;      % i=121
    res(i)= res(i)     +all(all( (acs>brs) == (acf>brf) ));
-   i=i+1 ;     % i=121
+   i=i+1;      % i=122
    res(i)= res(i)     +all(all( (acs<brs) == (acf<brf) ));
-   i=i+1 ;     % i=122
+   i=i+1;      % i=123
    res(i)= res(i)     +all(all( (acs!=brs) == (acf!=brf) ));
-   i=i+1 ;     % i=123
+   i=i+1;      % i=124
    res(i)= res(i)     +all(all( (acs>=brs) == (acf>=brf) ));
-   i=i+1 ;     % i=124
+   i=i+1;      % i=125
    res(i)= res(i)     +all(all( (acs<=brs) == (acf<=brf) ));
-   i=i+1 ;     % i=125
+   i=i+1;      % i=126
    res(i)= res(i)     +all(all( (acs==brs) == (acf==brf) ));
-   i=i+1 ;     % i=126
+   i=i+1;      % i=127
    res(i)= res(i)     +all(all( acs.*brs == acf.*brf ));
-   i=i+1 ;     % i=127
+   i=i+1;      % i=128
    res(i)= res(i)     +all(all( acf.*brs == acf.*brf ));
-   i=i+1 ;     % i=128
+   i=i+1;      % i=129
    res(i)= res(i)     +all(all( acs.*brf == acf.*brf ));
-   i=i+1 ;     % i=129
+   i=i+1;      % i=130
    res(i)= res(i)     +all(all( acs*crs == acf*crf ));
-   i=i+1 ;     % i=130
+   i=i+1;      % i=131
    res(i)= res(i)     +all(all( acf*crs == acf*crf ));
-   i=i+1 ;     % i=131
+   i=i+1;      % i=132
    res(i)= res(i)     +all(all( acs*crf == acf*crf ));
-   i=i+1 ;     % i=132
+   i=i+1;      % i=133

 %
 % complex sparse op complex sparse and  complex sparse op complex matrix
@@ -463,53 +468,52 @@
    # FIXME: this breaks if drs is 1x1
    rdif= abs(dcs\ecf - df_ef) < abs(mag*df_ef);
    res(i)= res(i)     +all(all( rdif ));
-   i=i+1 ;     % i=133
+   i=i+1;      % i=134

    rdif= abs(dcf\ecs - df_ef) < abs(mag*df_ef);
    res(i)= res(i)     +all(all( rdif ));
-   i=i+1 ;     % i=134
+   i=i+1;      % i=135

-# TODO: not avail yet
-#  rdif= abs(dcs\ecs - df_ef) < abs(mag*df_ef);
-#  res(i)= res(i)     +all(all( rdif ));
-#  i=i+1 ;     % i=135
+   rdif= abs(dcs\ecs - df_ef) < abs(mag*df_ef);
+   res(i)= res(i)     +all(all( rdif ));
+   i=i+1;      % i=136

    res(i)= res(i)     +all(all( acs+bcs == acf+bcf ));
-   i=i+1 ;     % i=136
+   i=i+1;      % i=137
    res(i)= res(i)     +all(all( acf+bcs == acf+bcf ));
-   i=i+1 ;     % i=137
+   i=i+1;      % i=138
    res(i)= res(i)     +all(all( acs+bcf == acf+bcf ));
-   i=i+1 ;     % i=138
+   i=i+1;      % i=139
    res(i)= res(i)     +all(all( acs-bcs == acf-bcf ));
-   i=i+1 ;     % i=139
+   i=i+1;      % i=140
    res(i)= res(i)     +all(all( acf-bcs == acf-bcf ));
-   i=i+1 ;     % i=140
+   i=i+1;      % i=141
    res(i)= res(i)     +all(all( acs-bcf == acf-bcf ));
-   i=i+1 ;     % i=141
+   i=i+1;      % i=142
    res(i)= res(i)     +all(all( (acs>bcs) == (acf>bcf) ));
-   i=i+1 ;     % i=142
+   i=i+1;      % i=143
    res(i)= res(i)     +all(all( (acs<bcs) == (acf<bcf) ));
-   i=i+1 ;     % i=143
+   i=i+1;      % i=144
    res(i)= res(i)     +all(all( (acs!=bcs) == (acf!=bcf) ));
-   i=i+1 ;     % i=144
+   i=i+1;      % i=145
    res(i)= res(i)     +all(all( (acs>=bcs) == (acf>=bcf) ));
-   i=i+1 ;     % i=145
+   i=i+1;      % i=146
    res(i)= res(i)     +all(all( (acs<=bcs) == (acf<=bcf) ));
-   i=i+1 ;     % i=146
+   i=i+1;      % i=147
    res(i)= res(i)     +all(all( (acs==bcs) == (acf==bcf) ));
-   i=i+1 ;     % i=147
+   i=i+1;      % i=148
    res(i)= res(i)     +all(all( acs.*bcs == acf.*bcf ));
-   i=i+1 ;     % i=148
+   i=i+1;      % i=149
    res(i)= res(i)     +all(all( acf.*bcs == acf.*bcf ));
-   i=i+1 ;     % i=149
+   i=i+1;      % i=150
    res(i)= res(i)     +all(all( acs.*bcf == acf.*bcf ));
-   i=i+1 ;     % i=150
+   i=i+1;      % i=151
    res(i)= res(i)     +all(all( acs*ccs == acf*ccf ));
-   i=i+1 ;     % i=151
+   i=i+1;      % i=152
    res(i)= res(i)     +all(all( acf*ccs == acf*ccf ));
-   i=i+1 ;     % i=152
+   i=i+1;      % i=153
    res(i)= res(i)     +all(all( acs*ccf == acf*ccf ));
-   i=i+1 ;     % i=153
+   i=i+1;      % i=154

 %
 % sparse select operations
@@ -518,17 +522,17 @@
    r1= ars(sel1); r2=arf(sel1);
    res(i)= res(i)     +all( r1(:) == r2(:) );
 %  res(i)= res(i)     +all( ars(sel1) == arf(sel1 ));
-   i=i+1 ;     % i=154
+   i=i+1;      % i=155
    res(i)= res(i)     +all( ars(:) == arf(:));
-   i=i+1 ;     % i=155
+   i=i+1;      % i=156
    res(i)= res(i)     +all(all( ars(sely,selx) == arf(sely,selx) ));
-   i=i+1 ;     % i=156
+   i=i+1;      % i=157
    res(i)= res(i)     +all(all( ars( :  ,selx) == arf( :  ,selx) ));
-   i=i+1 ;     % i=157
+   i=i+1;      % i=158
    res(i)= res(i)     +all(all( ars(sely, :  ) == arf(sely, :  ) ));
-   i=i+1 ;     % i=158
+   i=i+1;      % i=159
    res(i)= res(i)     +all(all( ars(:,:) == arf(:,:) ));
-   i=i+1 ;     % i=159
+   i=i+1;      % i=160

 %
 % sparse select operations
@@ -552,7 +556,7 @@
    res(i)= res(i) + all( [  ...
                all(all( abs(Ls2*Us2 - Lf2*Uf2 )< mag )) ; ...
                       1 ] );
-   i=i+1 ;     % i=160
+   i=i+1;      % i=161

    if OCTAVE
       [Ls4,Us4,PsR,PsC] = splu( drs );
@@ -570,32 +574,13 @@
                   all(all( Us4 .* UU == Us4 )) ] );
    end

-   i=i+1 ;     % i=161
-
-%  [Ls4,Us4,PsR,PsC,p1,p2] = splu( drs );
-if 0 % test code for old spinv
-   [dsi,Ls4,Lt,iL,Us4,iUt,iU] = spinv( drs );
-   mag= 1e-10;
-   res(i)= res(i) + all( [ ...
-           all(all( abs( inv(Ls4) - iL ) <= mag*(1+abs(inv(Ls4))) )),
-           all(all( abs( inv(Us4) - iU ) <= mag*(1+abs(inv(Us4))) ))
-           ]);
-   if ~all(all( abs( inv(Ls4) - iL ) < mag*(1+abs(inv(Ls4))) ));
-      printf('%d:L size=%d\n', tries, size(iU,1));
-      keyboard
-   end
-   if ~all(all( abs( inv(Us4) - iU ) < mag*(1+abs(inv(Us4))) ));
-      printf('%d:U size=%d\n', tries, size(iU,1));
-           abs( inv(Us4) - iU ) <= mag*(1+abs(inv(iU)))
-      keyboard
-   end
-endif #0
+   i=i+1;      % i=162

    dsi = spinv( drs );
    mag= 1e-10;
    res(i)= res(i) + all(all( ...
            abs( inv(drf) - dsi ) <= mag*(1+abs(inv(drf))) ));
-   i=i+1 ;     % i=162
+   i=i+1;      % i=163

    if OCTAVE
       res(i)= res(i)    +all( spfind(ars) == find(arf) );
@@ -605,16 +590,68 @@
       [I,J,S]= find(ars);
       [N,M]  = size(ars);
    end
-   i=i+1 ;     % i=163
+   i=i+1;      % i=164

    asnew= sparse(I,J,S,N,M);
    res(i)= res(i)    +all( all( asnew == ars ));
-   i=i+1 ;     % i=164
+   i=i+1;      % i=165

 %
 % complex sparse LU and inverse
 %
-% TODO
+   mag = 1e-12;
+   [Lf2,Uf2]     =   lu( dcf );
+   [Lf4,Uf4,Pf4] =   lu( dcf );
+
+   if OCTAVE
+      [Ls2,Us2]     = splu( dcs );
+   else
+      [Ls2,Us2]     = lu( dcs );
+   end
+
+% LU decomp may be different but U must be Upper and LU==d
+   res(i)= res(i) + all( [  ...
+               all(all( abs(Ls2*Us2 - Lf2*Uf2 )< mag )) ; ...
+                      1 ] );
+   i=i+1;      % i=166
+
+   if OCTAVE
+      [Ls4,Us4,PsR,PsC] = splu( dcs );
+      res(i)= res(i) + ...
+            all([ all(all(abs( PsR'*Ls4*Us4*PsC  - Pf4'*Lf4*Uf4 )<mag)) ;
+                  all(all(abs( PsR'*Ls4*Us4*PsC  - dcf )< mag)) ;
+                  all(all( Ls4 .* LL == Ls4 )) ;
+                  all(all( Us4 .* UU == Us4 )) ] );
+   elseif 0
+      [Ls4,Us4,Ps4] = lu( dcs );
+      res(i)= res(i) + ...
+            all([ all(all(abs( Ps4'*Ls4*Us4 - Pf4'*Lf4*Uf4 )<mag)) ;
+                  all(all(abs( Ps4'*Ls4*Us4 - dcf )< mag)) ;
+                  all(all( Ls4 .* LL == Ls4 )) ;
+                  all(all( Us4 .* UU == Us4 )) ] );
+   end
+   i=i+1;      % i=167
+
+   dci = spinv( dcs );
+   mag= 1e-10;
+   res(i)= res(i) + all(all( ...
+           abs( inv(dcf) - dci ) <= mag*(1+abs(inv(dcf))) ));
+   i=i+1;      % i=168
+
+   if OCTAVE
+      res(i)= res(i)    +all( spfind(acs) == find(acf) );
+      [I,J,S,N,M]= spfind(acs);
+   else
+      res(i)= res(i)    +all( find(acs) == find(acf) );
+      [I,J,S]= find(acs);
+      [N,M]  = size(acs);
+   end
+   i=i+1;      % i=169
+
+   asnew= sparse(I,J,S,N,M);
+   res(i)= res(i)    +all( all( asnew == acs ));
+   i=i+1;      % i=170
+

 end

@@ -635,8 +672,14 @@

 %
 % $Log$
-% Revision 1.1  2001/10/10 19:54:49  pkienzle
-% Initial revision
+% Revision 1.2  2001/10/12 02:24:28  aadler
+% Mods to fix bugs
+% add support for all zero sparse matrices
+% add support fom complex sparse inverse
+%
+% Revision 1.8  2001/09/23 17:46:12  aadler
+% updated README
+% modified licence to GPL plus link to opensource programmes
 %
 % Revision 1.7  2001/04/08 20:14:34  aadler
 % test cases for complex sparse
--- a/main/sparse/sparse_full.cc	Fri Oct 12 02:16:47 2001 +0000
+++ b/main/sparse/sparse_full.cc	Fri Oct 12 02:24:28 2001 +0000
@@ -16,6 +16,10 @@
 along with Octave; see the file COPYING.  If not, write to the Free
 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 02111-1307, USA.
+
+In addition to the terms of the GPL, you are permitted to link
+this program with any Open Source program, as defined by the
+Open Source Initiative (www.opensource.org)

 $Id$

@@ -101,6 +105,12 @@
            if (M(i,j)!=0) nnz++;
      retval(0)= (double) nnz;
   } else
+  if (args(0).type_name () == "scalar") {
+     retval(0)= args(0).scalar_value() != 0.0;
+  } else
+  if (args(0).type_name () == "complex scalar") {
+     retval(0)= args(0).complex_value() != 0.0;
+  } else
      gripe_wrong_type_arg ("nnz", args(0));

   return retval;
@@ -204,8 +214,14 @@

 /*
  * $Log$
- * Revision 1.1  2001/10/10 19:54:49  pkienzle
- * Initial revision
+ * Revision 1.2  2001/10/12 02:24:28  aadler
+ * Mods to fix bugs
+ * add support for all zero sparse matrices
+ * add support fom complex sparse inverse
+ *
+ * Revision 1.4  2001/09/23 17:46:12  aadler
+ * updated README
+ * modified licence to GPL plus link to opensource programmes
  *
  * Revision 1.3  2001/04/08 20:18:19  aadler
  * complex sparse support
--- a/main/sparse/sparse_inv.cc	Fri Oct 12 02:16:47 2001 +0000
+++ b/main/sparse/sparse_inv.cc	Fri Oct 12 02:24:28 2001 +0000
@@ -17,6 +17,10 @@
 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 02111-1307, USA.

+In addition to the terms of the GPL, you are permitted to link
+this program with any Open Source program, as defined by the
+Open Source Initiative (www.opensource.org)
+
 $Id$

 */
@@ -55,348 +59,6 @@
 }
 #endif // NDEBUG

-//
-// Fix the row ordering problems that
-// LUExtract seems to cause
-// NOTE: we don't try to fix other structural errors
-//    in the generated matrices, but we bail out
-//    if we find any. This should work since I
-//    haven't seen any problems other than ordering
-//
-// NOTE: The right way to fix this, of course, is to
-//    track down the bug in the superlu codebase.
-
-// define a struct and qsort
-// comparison function to do the reordering
-typedef struct { unsigned int idx;
-                 double       val; } fixrow_sort;
-
-static inline int
-fixrow_comp( const void *i, const void *j)
-{
-   return  ((fixrow_sort *) i)->idx -
-           ((fixrow_sort *) j)->idx ;
-}
-
-void
-fix_row_order( SuperMatrix X )
-{
-   DEBUGMSG("fix_row_order");
-   DEFINE_SP_POINTERS_REAL( X )
-   int    nnz= NCFX->nnz;
-
-   for ( int i=0; i < Xnr ; i++) {
-      assert( cidxX[i] >= 0);
-      assert( cidxX[i] <  nnz);
-      assert( cidxX[i] <=  cidxX[i+1]);
-      int reorder=0;
-      for( int j= cidxX[i];
-               j< cidxX[i+1];
-               j++ ) {
-         assert( ridxX[j] >= 0);
-         assert( ridxX[j] <  Xnc);
-         assert( coefX[j] !=  0); // don't keep zero values
-         if (j> cidxX[i])
-            if ( ridxX[j-1] > ridxX[j] )
-               reorder=1;
-      } // for j
-      if(reorder) {
-         int snum= cidxX[i+1] - cidxX[i];
-         fixrow_sort arry[snum];
-         // copy to the sort struct
-         for( int k=0,
-                  j= cidxX[i];
-                  j< cidxX[i+1];
-                  j++ ) {
-            arry[k].idx= ridxX[j];
-            arry[k].val= coefX[j];
-            k++;
-         }
-         qsort( arry, snum, sizeof(fixrow_sort), fixrow_comp );
-         // copy back to the position
-         for( int k=0,
-                  j= cidxX[i];
-                  j< cidxX[i+1];
-                  j++ ) {
-            ridxX[j]= arry[k].idx;
-            coefX[j]= arry[k].val;
-            k++;
-         }
-      }
-   } // for i
-}
-
-//
-// This routine converts from the SuperNodal Matrices
-// L,U to the Comp Col format.
-//
-// It is modified from SuperLU/MATLAB/mexsuperlu.c
-//
-// It seems to produce badly formatted U. ie the
-//   row indeces are unsorted.
-// Need to call function to fix this.
-//
-
-static void
-LUextract(SuperMatrix *L, SuperMatrix *U, double *Lval, int *Lrow,
-          int *Lcol, double *Uval, int *Urow, int *Ucol, int *snnzL,
-          int *snnzU)
-{
-   DEBUGMSG("LUextract");
-   int         i, j, k;
-   int         upper;
-   int         fsupc, istart, nsupr;
-   int         lastl = 0, lastu = 0;
-   double      *SNptr;
-
-   SCformat * Lstore = (SCformat *) L->Store;
-   NCformat * Ustore = (NCformat *) U->Store;
-   Lcol[0] = 0;
-   Ucol[0] = 0;
-
-   /* for each supernode */
-   for (k = 0; k <= Lstore->nsuper; ++k) {
-
-       fsupc = L_FST_SUPC(k);
-       istart = L_SUB_START(fsupc);
-       nsupr = L_SUB_START(fsupc+1) - istart;
-       upper = 1;
-
-       /* for each column in the supernode */
-       for (j = fsupc; j < L_FST_SUPC(k+1); ++j) {
-           SNptr = &((double*)Lstore->nzval)[L_NZ_START(j)];
-
-           /* Extract U */
-           for (i = U_NZ_START(j); i < U_NZ_START(j+1); ++i) {
-               Uval[lastu] = ((double*)Ustore->nzval)[i];
-               if (Uval[lastu] != 0.0) Urow[lastu++] = U_SUB(i);
-           }
-           /* upper triangle in the supernode */
-           for (i = 0; i < upper; ++i) {
-               Uval[lastu] = SNptr[i];
-               if (Uval[lastu] != 0.0) Urow[lastu++] = L_SUB(istart+i);
-           }
-           Ucol[j+1] = lastu;
-
-           /* Extract L */
-           Lval[lastl] = 1.0; /* unit diagonal */
-           Lrow[lastl++] = L_SUB(istart + upper - 1);
-           for (i = upper; i < nsupr; ++i) {
-               Lval[lastl] = SNptr[i];
-                /* Matlab doesn't like explicit zero. */
-               if (Lval[lastl] != 0.0) Lrow[lastl++] = L_SUB(istart+i);
-           }
-           Lcol[j+1] = lastl;
-
-           ++upper;
-
-       } /* for j ... */
-
-   } /* for k ... */
-
-   *snnzL = lastl;
-   *snnzU = lastu;
-}
-
-static void
-sparse_LU_fact(SuperMatrix A,
-               SuperMatrix *LC,
-               SuperMatrix *UC,
-               int * perm_c,
-               int * perm_r,
-               int permc_spec )
-{
-   DEBUGMSG("sparse_LU_fact");
-   int m = A.nrow;
-   int n = A.ncol;
-   char   refact[1] = {'N'};
-   double thresh    = 1.0;     // diagonal pivoting threshold
-   double drop_tol  = 0.0;     // drop tolerance parameter
-   int    info;
-   int    panel_size = sp_ienv(1);
-   int    relax      = sp_ienv(2);
-   int    etree[n];
-   SuperMatrix Ac;
-   SuperMatrix L,U;
-
-   StatInit(panel_size, relax);
-
-   oct_sparse_do_permc( permc_spec, perm_c, A);
-   // Apply column perm to A and compute etree.
-   sp_preorder(refact, &A, perm_c, etree, &Ac);
-
-   dgstrf(refact, &Ac, thresh, drop_tol, relax, panel_size, etree,
-           NULL, 0, perm_r, perm_c, &L, &U, &info);
-   if ( info < 0 )
-      SP_FATAL_ERR ("LU factorization error");
-
-   int      snnzL, snnzU;
-
-   int      nnzL = ((SCformat*)L.Store)->nnz;
-   double * Lval = (double *) malloc( nnzL * sizeof(double) );
-   int    * Lrow = (   int *) malloc( nnzL * sizeof(   int) );
-   int    * Lcol = (   int *) malloc( (n+1)* sizeof(   int) );
-
-   int      nnzU = ((NCformat*)U.Store)->nnz;
-   double * Uval = (double *) malloc( nnzU * sizeof(double) );
-   int    * Urow = (   int *) malloc( nnzU * sizeof(   int) );
-   int    * Ucol = (   int *) malloc( (n+1)* sizeof(   int) );
-
-   LUextract(&L, &U, Lval, Lrow, Lcol, Uval, Urow, Ucol, &snnzL, &snnzU);
-   // we need to use the snnz values (squeezed vs. unsqueezed)
-   dCreate_CompCol_Matrix(LC, m, n, snnzL, Lval, Lrow, Lcol, NC, _D, GE);
-   dCreate_CompCol_Matrix(UC, m, n, snnzU, Uval, Urow, Ucol, NC, _D, GE);
-
-   fix_row_order( *LC );
-   fix_row_order( *UC );
-
-   oct_sparse_Destroy_SuperMatrix( L ) ;
-   oct_sparse_Destroy_SuperMatrix( U ) ;
-   oct_sparse_Destroy_SuperMatrix( Ac ) ;
-   StatFree();
-
-#if 0
-   printf("verify A\n");  oct_sparse_verify_supermatrix( A );
-   printf("verify LC\n"); oct_sparse_verify_supermatrix( *LC );
-   printf("verify UC\n"); oct_sparse_verify_supermatrix( *UC );
-#endif
-
-} // sparse_LU_fact(
-
-// calculate the inverse of an
-//  upper triangular sparse matrix
-//
-// iUt = inv(U)
-//  Note that the transpose is returned
-//
-// CODE:
-//
-//  note that the input matrix is accesses in
-//  row major order, and the output is accessed
-//  in column major. This is the reason that the
-//  output matrix is the transpose of the input
-//
-// I= eye( size(A) );
-// for n=1:N  # across
-//    for m= n+1:N # down
-//       v=0;
-//       for i= m-1:-1:n
-//          v=v- A(m,i)*I(i,n);
-//       end
-//       I(m,n)= v/A(m,m);
-//    end
-//    I(:,n)= I(:,n)/A(n,n);    <- for non unit norm
-// end
-SuperMatrix
-sp_inv_uppertriang( SuperMatrix U)
-{
-   DEBUGMSG("sp_inv_uppertriang");
-   DEFINE_SP_POINTERS_REAL( U )
-   int    nnzU= NCFU->nnz;
-   // we need to be careful here,
-   // U is uppertriangular, but we're treating
-   // it as though it is a lower triag matrix in NR form
-
-// if ( Unc != Unr) SP_FATAL_ERR("sp_inv_uppertriang: nr!=nc");
-   assert ( Unc == Unr );
-
-   SuperMatrix   X;
-   DECLARE_SP_POINTERS_REAL( X )
-
-   // estimate inverse nnz= input nnz
-   int    nnz = NCFU->nnz;
-   ridxX = intMalloc   (nnz);
-   coefX = doubleMalloc(nnz);
-   cidxX = intMalloc   (Unc+1);
-
-   int cx= 0;
-
-   // iterate accross columns of output matrix
-   for ( int n=0; n < Unr ; n++) {
-      // place the 1 in the identity position
-      int cx_colstart= cx;
-
-      cidxX[n]= cx;
-      check_bounds( cx, nnz, ridxX, coefX );
-      ridxX[cx]= n;
-      coefX[cx]= 1.0;
-      cx++;
-
-      // iterate accross columns of input matrix
-      for ( int m= n+1; m< Unr; m++) {
-         double v=0;
-         // iterate to calculate sum
-         int colXp= cidxX[n];
-         int colUp= cidxU[m];
-
-         int rpX, rpU;
-         do {
-            rpX= ridxX [ colXp ];
-            rpU= ridxU [ colUp ];
-
-#if 0
-            int scolXp=colXp;
-            int scolUp=colUp;
-#endif
-            if (rpX < rpU) {
-               colXp++;
-            } else
-            if (rpX > rpU) {
-               colUp++;
-            } else {
-               assert(rpX == rpU);
-               assert(rpX >= n);
-
-               v-= coefX[ colXp ]*coefU[ colUp ];
-               colXp++;
-               colUp++;
-            }
-#if 0
-            printf("n=%d, m=%d, X[%d]=%7.4f U[%d]=%7.4f cXp=%d cUp=%d v=%7.4f\n",
-                  n,m, rpX, coefX[ scolXp ], rpU, coefU[ scolUp ],
-               scolXp,scolUp, v);
-#endif
-
-         } while ((rpX<m) && (rpU<m) && (colXp<cx) && (colUp<nnzU));
-
-         // get A(m,m)
-         colUp= cidxU[m+1]-1;
-//       if (ridxU[ colUp ] != m) SP_FATAL_ERR("sp_inv_uppertriang: not Utriang input");
-         // assert fails if U is not upper triangular
-         assert (ridxU[ colUp ] == m );
-
-         double pivot= coefU[ colUp ];
-         if (pivot == 0) gripe_divide_by_zero ();
-
-         if (v!=0) {
-            check_bounds( cx, nnz, ridxX, coefX );
-            ridxX[cx]= m;
-            coefX[cx]= v / pivot;
-            cx++;
-         }
-      } // for m
-
-      // get A(m,m)
-      int colUp= cidxU[n+1]-1;
-//    if (ridxU[ colUp ] != n) SP_FATAL_ERR("sp_inv_uppertriang: not Utriang input");
-      // assert fails if U is not upper triangular
-      assert (ridxU[ colUp ] == n );
-      double pivot= coefU[ colUp ];
-      if (pivot == 0) gripe_divide_by_zero ();
-
-      if (pivot!=1.0)
-         for( int i= cx_colstart; i< cx; i++)
-            coefX[i]/= pivot;
-
-   } // for n
-   cidxX[Unr]= cx;
-
-   maybe_shrink( cx, nnz, ridxX, coefX );
-   dCreate_CompCol_Matrix(&X, Unr, Unc, cx,
-                          coefX, ridxX, cidxX, NC, _D, GE);
-   return X;
-}
-

 DEFUN_DLD (splu, args, nargout ,
   "[L,U,Prow, Pcol] = splu( a ,p);\n\
@@ -432,17 +94,17 @@
       return retval;
    }

-   if (args(0).type_name () == "sparse") {
+   if (args(0).type_name () == "sparse" ||
+       args(0).type_name () == "complex_sparse") {
+
       const octave_value& rep = args(0).get_rep ();
+      int m = args(0).rows(); //A.nrow;
+      int n = args(0).columns(); //A.ncol;
+      if (m != n)
+         SP_FATAL_ERR("splu: input matrix must be square");

-      SuperMatrix A = ((const octave_sparse&) rep) . super_matrix ();
-      SuperMatrix L, U;
-      int m = A.nrow;
-      int n = A.ncol;
-      if (m != n)
-         SP_FATAL_ERR("Input matrix must be square");
-
       int perm_c[n];
+      int perm_r[m];
       int permc_spec=3;
       if (nargin ==2) {

@@ -452,27 +114,25 @@
          permc_spec= -1; //permc is perselected
       }

-      int perm_r[m];
-      sparse_LU_fact( A, &L, &U, perm_c, perm_r, permc_spec);
-
-      octave_value LS= new octave_sparse( L );
-      octave_value US= new octave_sparse( U );
-
-// Build the permutation matrix
-//  remember to add 1 because assemble_sparse is 1 based
-      ColumnVector ridxPr(m), cidxPr(m), coefPr(m);
-      for (int i=0; i<m; i++) {
-         ridxPr(i)= (double) i + 1;
-         cidxPr(i)= (double) perm_r[i] + 1;
-         coefPr(i)= 1.0;
+      octave_value LS, US;
+
+      if (args(0).type_name () == "sparse" ) {
+         SuperMatrix A = ((const octave_sparse&) rep) . super_matrix ();
+         SuperMatrix L, U;
+         sparse_LU_fact( A, &L, &U, perm_c, perm_r, permc_spec);
+         LS= new octave_sparse( L );
+         US= new octave_sparse( U );
+      }
+      else {
+         SuperMatrix A = ((const octave_complex_sparse&) rep) . super_matrix ();
+         SuperMatrix L, U;
+         complex_sparse_LU_fact( A, &L, &U, perm_c, perm_r, permc_spec);
+         LS= new octave_complex_sparse( L );
+         US= new octave_complex_sparse( U );
       }

-      ColumnVector ridxPc(n), cidxPc(n), coefPc(n);
-      for (int i=0; i<m; i++) {
-         ridxPc(i)= (double) i + 1;
-         cidxPc(i)= (double) perm_c[i] + 1;
-         coefPc(i)= 1.0;
-      }
+      BUILD_PERM_VECTORS( ridxPr, cidxPr, coefPr, perm_r, n )
+      BUILD_PERM_VECTORS( ridxPc, cidxPc, coefPc, perm_c, m )

       if (nargout ==2 ) {
          octave_value PrT= new octave_sparse (
@@ -501,6 +161,24 @@
 }


+#ifdef VERBOSE
+// this is for debugging memory leaks
+DEFUN_DLD (spdump, args, , "dump sparse")
+{
+   octave_value_list retval;
+   if (args(0).type_name () == "sparse") {
+      const octave_value& rep = args(0).get_rep ();
+      SuperMatrix A = ((const octave_sparse&) rep) . super_matrix ();
+
+      printf("A->%08X<-\n", (unsigned int) ((NCformat *)A.Store)->rowind );
+      printf("A->%08X<-\n", (unsigned int) ((NCformat *)A.Store)->colptr );
+      printf("A->%08X<-\n", (unsigned int) ((NCformat *)A.Store)->nzval );
+      printf("A->%08X<-\n", (unsigned int) A.Store );
+   }
+   return retval;
+}
+#endif
+
 //
 // calculate the pieces of the sparse_inverse
 //
@@ -518,15 +196,16 @@
 //    oct_sparse_inverse( A, perm_c, perm_c_spec )
 //
 octave_value_list
-oct_sparse_inverse( SuperMatrix A,
+oct_sparse_inverse( const octave_sparse& Asp,
                     int* perm_c,
                     int permc_spec
       ) {
+   DEBUGMSG("oct_sparse_inverse (sparse)");
    octave_value_list retval;
+
+   SuperMatrix A= Asp.super_matrix();
    SuperMatrix L, U;

-   if (A.ncol != A.nrow) SP_FATAL_ERR("Input matrix must be square");
-
    int n= A.ncol;
    int m= A.nrow;
    assert(n == m);
@@ -534,21 +213,8 @@
    int perm_r[m];
    sparse_LU_fact( A, &L, &U, perm_c, perm_r, permc_spec);

-// Build the permutation matrix
-//  remember to add 1 because assemble_sparse is 1 based
-   ColumnVector ridxPr(m), cidxPr(m), coefPr(m);
-   for (int i=0; i<m; i++) {
-      ridxPr(i)= (double) i + 1;
-      cidxPr(i)= (double) perm_r[i] + 1;
-      coefPr(i)= 1.0;
-   }
-
-   ColumnVector ridxPc(n), cidxPc(n), coefPc(n);
-   for (int i=0; i<m; i++) {
-      ridxPc(i)= (double) i + 1;
-      cidxPc(i)= (double) perm_c[i] + 1;
-      coefPc(i)= 1.0;
-   }
+   BUILD_PERM_VECTORS( ridxPr, cidxPr, coefPr, perm_r, m )
+   BUILD_PERM_VECTORS( ridxPc, cidxPc, coefPc, perm_c, n )

    octave_value Pr = new octave_sparse (
          assemble_sparse( m, m, coefPr, cidxPr, ridxPr ) );
@@ -556,8 +222,8 @@
          assemble_sparse( n, n, coefPc, ridxPc, cidxPc ) );

    SuperMatrix Lt= oct_sparse_transpose( L );
-   SuperMatrix iL= sp_inv_uppertriang( Lt );
-   SuperMatrix iUt= sp_inv_uppertriang( U );
+   SuperMatrix iL= sparse_inv_uppertriang( Lt );
+   SuperMatrix iUt= sparse_inv_uppertriang( U );
    SuperMatrix iU= oct_sparse_transpose( iUt );

    oct_sparse_Destroy_SuperMatrix( L);
@@ -576,25 +242,52 @@
    return retval;
 }

-
-#ifdef VERBOSE
-// this is for debugging memory leaks
-DEFUN_DLD (spdump, args, , "dump sparse")
-{
+octave_value_list
+oct_sparse_inverse( const octave_complex_sparse& Asp,
+                    int* perm_c,
+                    int permc_spec
+      ) {
+   DEBUGMSG("oct_sparse_inverse (complex_sparse)");
    octave_value_list retval;
-   if (args(0).type_name () == "sparse") {
-      const octave_value& rep = args(0).get_rep ();
-      SuperMatrix A = ((const octave_sparse&) rep) . super_matrix ();
+
+   SuperMatrix A= Asp.super_matrix();
+   SuperMatrix L, U;
+
+   int n= A.ncol;
+   int m= A.nrow;
+   assert(n == m);
+
+   int perm_r[m];
+   complex_sparse_LU_fact( A, &L, &U, perm_c, perm_r, permc_spec);
+
+   BUILD_PERM_VECTORS( ridxPr, cidxPr, coefPr, perm_r, m )
+   BUILD_PERM_VECTORS( ridxPc, cidxPc, coefPc, perm_c, n )
+
+   octave_value Pr = new octave_sparse (
+         assemble_sparse( m, m, coefPr, cidxPr, ridxPr ) );
+   octave_value PcT= new octave_sparse (
+         assemble_sparse( n, n, coefPc, ridxPc, cidxPc ) );

-      printf("A->%08X<-\n", (unsigned int) ((NCformat *)A.Store)->rowind );
-      printf("A->%08X<-\n", (unsigned int) ((NCformat *)A.Store)->colptr );
-      printf("A->%08X<-\n", (unsigned int) ((NCformat *)A.Store)->nzval );
-      printf("A->%08X<-\n", (unsigned int) A.Store );
-   }
+   SuperMatrix Lt= oct_complex_sparse_transpose( L );
+   SuperMatrix iL= complex_sparse_inv_uppertriang( Lt );
+   SuperMatrix iUt= complex_sparse_inv_uppertriang( U );
+   SuperMatrix iU= oct_complex_sparse_transpose( iUt );
+
+   oct_sparse_Destroy_SuperMatrix( L);
+   oct_sparse_Destroy_SuperMatrix( Lt);
+   oct_sparse_Destroy_SuperMatrix( U);
+   oct_sparse_Destroy_SuperMatrix( iUt);
+
+   octave_value iLS =  new octave_complex_sparse( iL );
+   octave_value iUS =  new octave_complex_sparse( iU );
+
+   retval(0)= PcT;
+   retval(1)= iUS;
+   retval(2)= iLS;
+   retval(3)= Pr;
+
    return retval;
 }
-#endif
-

 DEFUN_DLD (spinv, args, nargout ,
   "[ainv] = spinv( a );\n\
@@ -623,31 +316,32 @@
       print_usage ("spinv");
       return retval;
    }
-
-   if (args(0).type_name () == "sparse") {
-      const octave_value& rep = args(0).get_rep ();
-
-      SuperMatrix A = ((const octave_sparse&) rep) . super_matrix ();

-      int n = A.ncol;
+   int n = args(0).columns();
+   if (n != args(0).rows()) SP_FATAL_ERR("spinv: Input matrix must be square");

-      int perm_c[n];
-      int permc_spec=3;
+   int perm_c[n];
+   int permc_spec=3;

-      if (nargin ==2) {
-         ColumnVector permcidx ( args(1).vector_value() );
-         for( int i= 0; i< n ; i++ )
-            perm_c[i]= (int) permcidx(i) - 1;
-         permc_spec= -1; //permc is preselected
-      }
-
-      octave_value_list Ai =
-         oct_sparse_inverse( A, perm_c, permc_spec );
-
+   if (nargin ==2) {
+      ColumnVector permcidx ( args(1).vector_value() );
+      for( int i= 0; i< n ; i++ )
+         perm_c[i]= (int) permcidx(i) - 1;
+      permc_spec= -1; //permc is preselected
+   }
+
+   if (args(0).type_name () == "sparse" ) {
+      const octave_sparse& A =
+           (const octave_sparse&) args(0).get_rep();
+      octave_value_list Ai = oct_sparse_inverse( A, perm_c, permc_spec );
       retval(0)= Ai(0) * Ai(1) * Ai(2) * Ai(3);
-
-   }
-   else
+   } else
+   if ( args(0).type_name () == "complex_sparse" ) {
+      const octave_complex_sparse& A =
+           (const octave_complex_sparse&) args(0).get_rep();
+      octave_value_list Ai = oct_sparse_inverse( A, perm_c, permc_spec );
+      retval(0)= Ai(0) * Ai(1) * Ai(2) * Ai(3);
+   } else
      gripe_wrong_type_arg ("spinv", args(0));

    return retval;
@@ -655,8 +349,14 @@

 /*
  * $Log$
- * Revision 1.1  2001/10/10 19:54:49  pkienzle
- * Initial revision
+ * Revision 1.2  2001/10/12 02:24:28  aadler
+ * Mods to fix bugs
+ * add support for all zero sparse matrices
+ * add support fom complex sparse inverse
+ *
+ * Revision 1.4  2001/09/23 17:46:12  aadler
+ * updated README
+ * modified licence to GPL plus link to opensource programmes
  *
  * Revision 1.3  2001/04/04 02:13:46  aadler
  * complete complex_sparse, templates, fix memory leaks
--- a/main/sparse/sparse_ops.cc	Fri Oct 12 02:16:47 2001 +0000
+++ b/main/sparse/sparse_ops.cc	Fri Oct 12 02:24:28 2001 +0000
@@ -19,6 +19,10 @@
 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 02111-1307, USA.

+In addition to the terms of the GPL, you are permitted to link
+this program with any Open Source program, as defined by the
+Open Source Initiative (www.opensource.org)
+
 $Id$

 */
@@ -294,6 +298,13 @@
    sort_with_idx (ixp, ix, ixl);

    ColumnVector O( ixl );
+   // special case if X is all zeros
+   if (NCFX->nnz == 0 ) {
+      for (long k=0; k< ixl; k++)
+         O( k )= 0.0;
+      return O;
+   }
+
    long ip= -Xnr; // previous column position
    long jj=0,jl=0;
    for (long k=0; k< ixl; k++) {
@@ -366,10 +377,10 @@
          if ( ii<0 || ii>=Xnr)
             SP_FATAL_ERR("invalid matrix index (x index)");

-         while ( ridxX[jl] < ii && jl < jj ) jl++;
+         while ( jl < jj && ridxX[jl] < ii ) jl++;


-         if ( ridxX[jl] == ii && jl<jj )
+         if ( jl<jj && ridxX[jl] == ii )
             tcol[ kout ] = coefX[jl] ;
          else
             tcol[ kout ] = 0 ;
@@ -792,20 +803,20 @@
 DEFBINOP( s_s_ldiv, sparse, sparse) {
    DEBUGMSG("sparse - s_s_ldiv");
    CAST_BINOP_ARGS ( const octave_sparse&, const octave_sparse&);
-   SuperMatrix   S= v1.super_matrix();
+// SuperMatrix   S= v1.super_matrix();
    SuperMatrix   B= v2.super_matrix(); DEFINE_SP_POINTERS_REAL( B )
 // octave_value  B= new octave_sparse( v2.super_matrix() );
-   int n = S.ncol;
+   int n = v1.columns();
    int perm_c[n];
    int permc_spec=3;
-   octave_value_list Si= oct_sparse_inverse( S, perm_c, permc_spec );
+   octave_value_list Si= oct_sparse_inverse( v1, perm_c, permc_spec );
    octave_value inv= Si(0)*Si(1)*Si(2)*Si(3);
    const octave_value& rep = inv.get_rep ();
    SuperMatrix A = ((const octave_sparse&) rep) . super_matrix ();
    DEFINE_SP_POINTERS_REAL( A )
    SPARSE_SPARSE_MUL( double )
    return new octave_sparse ( X );
-} // f_s_ldiv
+} // s_s_ldiv


 // This is a wrapper around the SuperLU get_perm_c function
@@ -982,10 +993,174 @@
    INSTALL_BINOP (op_mul,      octave_sparse, octave_sparse, s_s_mul);
 }

+// functions for splu and inverse
+
+//
+// This routine converts from the SuperNodal Matrices
+// L,U to the Comp Col format.
+//
+// It is modified from SuperLU/MATLAB/mexsuperlu.c
+//
+// It seems to produce badly formatted U. ie the
+//   row indeces are unsorted.
+// Need to call function to fix this.
+//
+
+void
+LUextract(SuperMatrix *L, SuperMatrix *U, double *Lval, int *Lrow,
+          int *Lcol, double *Uval, int *Urow, int *Ucol, int *snnzL,
+          int *snnzU)
+{
+   DEBUGMSG("LUextract");
+   int         i, j, k;
+   int         upper;
+   int         fsupc, istart, nsupr;
+   int         lastl = 0, lastu = 0;
+   double      *SNptr;
+
+   SCformat * Lstore = (SCformat *) L->Store;
+   NCformat * Ustore = (NCformat *) U->Store;
+   Lcol[0] = 0;
+   Ucol[0] = 0;
+
+   /* for each supernode */
+   for (k = 0; k <= Lstore->nsuper; ++k) {
+
+       fsupc = L_FST_SUPC(k);
+       istart = L_SUB_START(fsupc);
+       nsupr = L_SUB_START(fsupc+1) - istart;
+       upper = 1;
+
+       /* for each column in the supernode */
+       for (j = fsupc; j < L_FST_SUPC(k+1); ++j) {
+           SNptr = &((double*)Lstore->nzval)[L_NZ_START(j)];
+
+           /* Extract U */
+           for (i = U_NZ_START(j); i < U_NZ_START(j+1); ++i) {
+               Uval[lastu] = ((double*)Ustore->nzval)[i];
+               if (Uval[lastu] != 0.0) Urow[lastu++] = U_SUB(i);
+           }
+           /* upper triangle in the supernode */
+           for (i = 0; i < upper; ++i) {
+               Uval[lastu] = SNptr[i];
+               if (Uval[lastu] != 0.0) Urow[lastu++] = L_SUB(istart+i);
+           }
+           Ucol[j+1] = lastu;
+
+           /* Extract L */
+           Lval[lastl] = 1.0; /* unit diagonal */
+           Lrow[lastl++] = L_SUB(istart + upper - 1);
+           for (i = upper; i < nsupr; ++i) {
+               Lval[lastl] = SNptr[i];
+               if (Lval[lastl] != 0.0) Lrow[lastl++] = L_SUB(istart+i);
+           }
+           Lcol[j+1] = lastl;
+
+           ++upper;
+
+       } /* for j ... */
+
+   } /* for k ... */
+
+   *snnzL = lastl;
+   *snnzU = lastu;
+}
+
+void
+sparse_LU_fact(SuperMatrix A,
+               SuperMatrix *LC,
+               SuperMatrix *UC,
+               int * perm_c,
+               int * perm_r,
+               int permc_spec )
+{
+   DEBUGMSG("sparse_LU_fact");
+   int m = A.nrow;
+   int n = A.ncol;
+   char   refact[1] = {'N'};
+   double thresh    = 1.0;     // diagonal pivoting threshold
+   double drop_tol  = 0.0;     // drop tolerance parameter
+   int    info;
+   int    panel_size = sp_ienv(1);
+   int    relax      = sp_ienv(2);
+   int    etree[n];
+   SuperMatrix Ac;
+   SuperMatrix L,U;
+
+   StatInit(panel_size, relax);
+
+   oct_sparse_do_permc( permc_spec, perm_c, A);
+   // Apply column perm to A and compute etree.
+   sp_preorder(refact, &A, perm_c, etree, &Ac);
+
+   dgstrf(refact, &Ac, thresh, drop_tol, relax, panel_size, etree,
+           NULL, 0, perm_r, perm_c, &L, &U, &info);
+   if ( info < 0 )
+      SP_FATAL_ERR ("LU factorization error");
+
+   int      snnzL, snnzU;
+
+   int      nnzL = ((SCformat*)L.Store)->nnz;
+   double * Lval = (double *) malloc( nnzL * sizeof(double) );
+   int    * Lrow = (   int *) malloc( nnzL * sizeof(   int) );
+   int    * Lcol = (   int *) malloc( (n+1)* sizeof(   int) );
+
+   int      nnzU = ((NCformat*)U.Store)->nnz;
+   double * Uval = (double *) malloc( nnzU * sizeof(double) );
+   int    * Urow = (   int *) malloc( nnzU * sizeof(   int) );
+   int    * Ucol = (   int *) malloc( (n+1)* sizeof(   int) );
+
+   LUextract(&L, &U, Lval, Lrow, Lcol, Uval, Urow, Ucol, &snnzL, &snnzU);
+   // we need to use the snnz values (squeezed vs. unsqueezed)
+   dCreate_CompCol_Matrix(LC, m, n, snnzL, Lval, Lrow, Lcol, NC, _D, GE);
+   dCreate_CompCol_Matrix(UC, m, n, snnzU, Uval, Urow, Ucol, NC, _D, GE);
+
+   fix_row_order( *LC );
+   fix_row_order( *UC );
+
+   oct_sparse_Destroy_SuperMatrix( L ) ;
+   oct_sparse_Destroy_SuperMatrix( U ) ;
+   oct_sparse_Destroy_SuperMatrix( Ac ) ;
+   StatFree();
+
+#if 0
+   printf("verify A\n");  oct_sparse_verify_supermatrix( A );
+   printf("verify LC\n"); oct_sparse_verify_supermatrix( *LC );
+   printf("verify UC\n"); oct_sparse_verify_supermatrix( *UC );
+#endif
+
+} // sparse_LU_fact(
+
+FIX_ROW_ORDER_SORT_FUNCTIONS( double )
+
+void
+fix_row_order( SuperMatrix X )
+{
+   DEBUGMSG("fix_row_order");
+   DEFINE_SP_POINTERS_REAL( X )
+   FIX_ROW_ORDER_FUNCTIONS
+}
+
+SuperMatrix
+sparse_inv_uppertriang( SuperMatrix U)
+{
+   DEBUGMSG("sparse_inv_uppertriang");
+   DEFINE_SP_POINTERS_REAL( U )
+   int    nnzU= NCFU->nnz;
+   SPARSE_INV_UPPERTRIANG( double )
+   return create_SuperMatrix( Unr,Unc,cx, coefX, ridxX, cidxX );
+}
+
 /*
  * $Log$
- * Revision 1.1  2001/10/10 19:54:49  pkienzle
- * Initial revision
+ * Revision 1.2  2001/10/12 02:24:28  aadler
+ * Mods to fix bugs
+ * add support for all zero sparse matrices
+ * add support fom complex sparse inverse
+ *
+ * Revision 1.8  2001/09/23 17:46:12  aadler
+ * updated README
+ * modified licence to GPL plus link to opensource programmes
  *
  * Revision 1.7  2001/04/04 02:13:46  aadler
  * complete complex_sparse, templates, fix memory leaks
--- a/main/sparse/sparse_ops.h	Fri Oct 12 02:16:47 2001 +0000
+++ b/main/sparse/sparse_ops.h	Fri Oct 12 02:24:28 2001 +0000
@@ -19,8 +19,10 @@
 $Id$

 $Log$
-Revision 1.1  2001/10/10 19:54:49  pkienzle
-Initial revision
+Revision 1.2  2001/10/12 02:24:28  aadler
+Mods to fix bugs
+add support for all zero sparse matrices
+add support fom complex sparse inverse

 Revision 1.2  2001/04/04 02:13:46  aadler
 complete complex_sparse, templates, fix memory leaks
@@ -56,44 +58,42 @@
          int  ja= cidxA[ i ]; \
          int  ja_max= cidxA[ i+1 ]; \
          bool ja_lt_max= ja < ja_max; \
-         int  ridxA_ja= ridxA[ ja ]; \
  \
          int  jb= cidxB[ i ]; \
          int  jb_max= cidxB[ i+1 ]; \
          bool jb_lt_max= jb < jb_max; \
-         int  ridxB_jb= ridxB[ jb ]; \
  \
          while( ja_lt_max || jb_lt_max ) { \
             if( ( !jb_lt_max ) || \
-                ((ridxA_ja < ridxB_jb) && ja_lt_max ) ) \
+                ( ja_lt_max && (ridxA[ja] < ridxB[jb]) ) ) \
             { \
                if (A_B_INTERACT) { \
-                  ridxX[ jx ] = ridxA_ja; \
+                  ridxX[ jx ] = ridxA[ja]; \
                   coefX[ jx ] = coefA[ ja ] _OP_ 0.0; \
                   jx++; \
                } \
-               ja++; ridxA_ja= ridxA[ ja ]; ja_lt_max= ja < ja_max; \
+               ja++; ja_lt_max= ja < ja_max; \
             } else \
             if( ( !ja_lt_max ) || \
-               ((ridxB_jb < ridxA_ja) && jb_lt_max ) ) \
+               ( jb_lt_max && (ridxB[jb] < ridxA[ja]) ) ) \
             { \
                if (A_B_INTERACT) { \
-                  ridxX[ jx ] = ridxB_jb; \
+                  ridxX[ jx ] = ridxB[jb]; \
                   coefX[ jx ] = 0.0 _OP_ coefB[ jb ]; \
                   jx++; \
                } \
-               jb++; ridxB_jb= ridxB[ jb ]; jb_lt_max= jb < jb_max; \
+               jb++; jb_lt_max= jb < jb_max; \
             } else \
             { \
-               assert( ridxA_ja == ridxB_jb ); \
+               assert( ridxA[ja] == ridxB[jb] ); \
                TYPX tmpval= coefA[ ja ] _OP_ coefB[ jb ]; \
                if (tmpval !=0.0) { \
                   coefX[ jx ] = tmpval; \
-                  ridxX[ jx ] = ridxA_ja; \
+                  ridxX[ jx ] = ridxA[ja]; \
                   jx++; \
                } \
-               ja++; ridxA_ja= ridxA[ ja ]; ja_lt_max= ja < ja_max; \
-               jb++; ridxB_jb= ridxB[ jb ]; jb_lt_max= jb < jb_max; \
+               ja++; ja_lt_max= ja < ja_max; \
+               jb++; jb_lt_max= jb < jb_max; \
             } \
          }  \
          cidxX[i+1] = jx;  \
@@ -132,6 +132,8 @@
    }

 // multiply type ops
+// TYPM is the output matrix type
+// TYPX is the sparse matrix type
 #define SPARSE_MATRIX_MUL( TYPM, TYPX) \
    TYPM X (Anr , Bnc);  \
    if (Anc != Bnr) { \
@@ -283,3 +285,192 @@
    maybe_shrink( jx, nnz, ridx, coef ); \
    SuperMatrix X= create_SuperMatrix( Anr, Anc, jx, coef, ridx, cidx );

+
+//
+// Next three functions
+//
+// Fix the row ordering problems that
+// LUExtract seems to cause
+// NOTE: we don't try to fix other structural errors
+//    in the generated matrices, but we bail out
+//    if we find any. This should work since I
+//    haven't seen any problems other than ordering
+//
+// NOTE: The right way to fix this, of course, is to
+//    track down the bug in the superlu codebase.
+
+// define a struct and qsort
+// comparison function to do the reordering
+#define FIX_ROW_ORDER_SORT_FUNCTIONS( TYPE ) \
+typedef struct { unsigned int idx; \
+                 TYPE         val; } fixrow_sort; \
+static inline int \
+fixrow_comp( const void *i, const void *j)  \
+{ \
+   return  ((fixrow_sort *) i)->idx - \
+           ((fixrow_sort *) j)->idx ; \
+}
+
+// this define ends function like
+// void
+// fix_row_order ## TYPE( SuperMatrix X )
+// {
+//    DEBUGMSG("fix_row_order");
+//    DEFINE_SP_POINTERS_REAL( X )
+//    FIX_ROW_ORDER_FUNCTIONS( TYPE )
+// }
+
+#define FIX_ROW_ORDER_FUNCTIONS \
+   int    nnz= NCFX->nnz; \
+   for ( int i=0; i < Xnr ; i++) { \
+      assert( cidxX[i] >= 0); \
+      assert( cidxX[i] <  nnz); \
+      assert( cidxX[i] <=  cidxX[i+1]); \
+      int reorder=0; \
+      for( int j= cidxX[i]; \
+               j< cidxX[i+1]; \
+               j++ ) { \
+         assert( ridxX[j] >= 0); \
+         assert( ridxX[j] <  Xnc); \
+         assert( coefX[j] !=  0); /* don't keep zero values */ \
+         if (j> cidxX[i]) \
+            if ( ridxX[j-1] > ridxX[j] ) \
+               reorder=1; \
+      } /* for j */ \
+      if(reorder) { \
+         int snum= cidxX[i+1] - cidxX[i]; \
+         fixrow_sort arry[snum]; \
+         /* copy to the sort struct */ \
+         for( int k=0, \
+                  j= cidxX[i]; \
+                  j< cidxX[i+1]; \
+                  j++ ) { \
+            arry[k].idx= ridxX[j]; \
+            arry[k].val= coefX[j]; \
+            k++; \
+         } \
+         qsort( arry, snum, sizeof(fixrow_sort), fixrow_comp ); \
+         /* copy back to the position */ \
+         for( int k=0, \
+                  j= cidxX[i]; \
+                  j< cidxX[i+1]; \
+                  j++ ) { \
+            ridxX[j]= arry[k].idx; \
+            coefX[j]= arry[k].val; \
+            k++; \
+         } \
+      } \
+   } // for i
+// endof FIX_ROW_ORDER_FUNCTIONS
+
+
+// calculate the inverse of an
+//  upper triangular sparse matrix
+//
+// iUt = inv(U)
+//  Note that the transpose is returned
+//
+// CODE:
+//
+//  note that the input matrix is accesses in
+//  row major order, and the output is accessed
+//  in column major. This is the reason that the
+//  output matrix is the transpose of the input
+//
+// I= eye( size(A) );
+// for n=1:N  # across
+//    for m= n+1:N # down
+//       v=0;
+//       for i= m-1:-1:n
+//          v=v- A(m,i)*I(i,n);
+//       end
+//       I(m,n)= v/A(m,m);
+//    end
+//    I(:,n)= I(:,n)/A(n,n);    <- for non unit norm
+// end
+//
+// we need to be careful here,
+// U is uppertriangular, but we're treating
+// it as though it is a lower triag matrix in NR form
+//
+// debug code
+//          printf("n=%d, m=%d, X[%d]=%7.4f U[%d]=%7.4f cXp=%d cUp=%d v=%7.4f\n",
+//                n,m, rpX, coefX[ scolXp ], rpU, coefU[ scolUp ],
+//             scolXp,scolUp, v);
+
+#define SPARSE_INV_UPPERTRIANG( TYPE ) \
+   assert ( Unc == Unr ); \
+   /* estimate inverse nnz= input nnz */ \
+   int     nnz = NCFU->nnz; \
+   int * ridxX = new int  [nnz]; \
+   TYPE* coefX = new TYPE [nnz]; \
+   int * cidxX = new int  [Unc+1]; \
+ \
+   int cx= 0; \
+ \
+   /* iterate accross columns of output matrix */ \
+   for ( int n=0; n < Unr ; n++) { \
+      /* place the 1 in the identity position */ \
+      int cx_colstart= cx; \
+ \
+      cidxX[n]= cx; \
+      check_bounds( cx, nnz, ridxX, coefX ); \
+      ridxX[cx]= n; \
+      coefX[cx]= 1.0; \
+      cx++; \
+ \
+      /* iterate accross columns of input matrix */ \
+      for ( int m= n+1; m< Unr; m++) { \
+         TYPE v=0; \
+         /* iterate to calculate sum */ \
+         int colXp= cidxX[n]; \
+         int colUp= cidxU[m]; \
+ \
+         int rpX, rpU; \
+         do { \
+            rpX= ridxX [ colXp ]; \
+            rpU= ridxU [ colUp ]; \
+ \
+            if (rpX < rpU) { \
+               colXp++; \
+            } else \
+            if (rpX > rpU) { \
+               colUp++; \
+            } else { \
+               assert(rpX == rpU); \
+               assert(rpX >= n); \
+ \
+               v-= coefX[ colXp ]*coefU[ colUp ]; \
+               colXp++;  \
+               colUp++; \
+            }  \
+ \
+         } while ((rpX<m) && (rpU<m) && (colXp<cx) && (colUp<nnzU)); \
+ \
+         /* get A(m,m) */ \
+         colUp= cidxU[m+1]-1; \
+         assert (ridxU[ colUp ] == m ); /* assert  U is upper triangular */ \
+ \
+         TYPE pivot= coefU[ colUp ]; \
+         if (pivot == 0) gripe_divide_by_zero (); \
+ \
+         if (v!=0) { \
+            check_bounds( cx, nnz, ridxX, coefX ); \
+            ridxX[cx]= m; \
+            coefX[cx]= v / pivot; \
+            cx++; \
+         } \
+      } /* for m */ \
+ \
+      /* get A(m,m) */ \
+      int colUp= cidxU[n+1]-1; \
+      assert (ridxU[ colUp ] == n ); /* assert U upper triangular */ \
+      TYPE pivot= coefU[ colUp ]; \
+      if (pivot == 0) gripe_divide_by_zero (); \
+ \
+      if (pivot!=1.0) \
+         for( int i= cx_colstart; i< cx; i++)  \
+            coefX[i]/= pivot; \
+   } /* for n */ \
+   cidxX[Unr]= cx; \
+   maybe_shrink( cx, nnz, ridxX, coefX );