Mercurial > octave

--- a/libinterp/corefcn/__ichol__.cc	Tue Mar 08 13:52:03 2016 -0800
+++ b/libinterp/corefcn/__ichol__.cc	Wed Mar 09 09:55:32 2016 -0800
@@ -52,7 +52,7 @@
 bool ichol_checkpivot_complex (Complex pivot)
 {
   if (pivot.imag () != 0)
-    error ("ichol: non-real pivot encountered.  The matrix must be hermitian.");
+    error ("ichol: non-real pivot encountered.  The matrix must be Hermitian.");
   else if (pivot.real () < 0)
     error ("ichol: negative pivot encountered");

@@ -71,10 +71,10 @@
           bool (*ichol_checkpivot) (T)>
 void ichol_0 (octave_matrix_t& sm, const std::string michol = "off")
 {
-
   const octave_idx_type n = sm.cols ();
   octave_idx_type j1, jend, j2, jrow, jjrow, j, jw, i, k, jj, r;
   T tl;
+
   char opt;
   enum {OFF, ON};
   if (michol == "on")
@@ -102,7 +102,7 @@
       dropsums[i] = 0;
     }

-  // Main loop
+  // Loop over all columns
   for (k = 0; k < n; k++)
     {
       j1 = cidx[k];
@@ -110,7 +110,7 @@
       for (j = j1; j < j2; j++)
         iw[ridx[j]] = j;

-      jrow = Llist [k];
+      jrow = Llist[k];
       // Iterate over each non-zero element in the actual row.
       while (jrow != -1)
         {
@@ -148,7 +148,8 @@
       if (opt == ON)
         data[j1] += dropsums[k];

-      if (ridx[j1] != k)
+      // Test for j1 == j2 must be first to avoid invalid ridx[j1] access
+      if (j1 == j2 || ridx[j1] != k)
         error ("ichol: encountered a pivot equal to 0");

       if (! ichol_checkpivot (data[j1]))
@@ -192,12 +193,12 @@
   // In ICHOL0 algorithm the zero-pattern of the input matrix is preserved
   // so it's structure does not change during the algorithm.  The same input
   // matrix is used to build the output matrix due to that fact.
-  octave_value_list param_list;
+  octave_value_list arg_list;
   if (! args(0).is_complex_type ())
     {
       SparseMatrix sm = args(0).sparse_matrix_value ();
-      param_list.append (sm);
-      sm = feval ("tril", param_list)(0).sparse_matrix_value ();
+      arg_list.append (sm);
+      sm = feval ("tril", arg_list)(0).sparse_matrix_value ();
       ichol_0 <SparseMatrix, double, ichol_mult_real,
                ichol_checkpivot_real> (sm, michol);

@@ -206,8 +207,8 @@
   else
     {
       SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
-      param_list.append (sm);
-      sm = feval ("tril", param_list)(0).sparse_complex_matrix_value ();
+      arg_list.append (sm);
+      sm = feval ("tril", arg_list)(0).sparse_complex_matrix_value ();
       ichol_0 <SparseComplexMatrix, Complex, ichol_mult_complex,
                ichol_checkpivot_complex> (sm, michol);

@@ -433,19 +434,19 @@
   if (nargin == 3)
     michol = args(2).string_value ();

-  octave_value_list param_list;
+  octave_value_list arg_list;
   if (! args(0).is_complex_type ())
     {
       Array <double> cols_norm;
       SparseMatrix L;
-      param_list.append (args(0).sparse_matrix_value ());
+      arg_list.append (args(0).sparse_matrix_value ());
       SparseMatrix sm_l =
-        feval ("tril", param_list)(0).sparse_matrix_value ();
-      param_list(0) = sm_l;
-      param_list(1) = 1;
-      param_list(2) = "cols";
-      cols_norm = feval ("norm", param_list)(0).vector_value ();
-      param_list.clear ();
+        feval ("tril", arg_list)(0).sparse_matrix_value ();
+      arg_list(0) = sm_l;
+      arg_list(1) = 1;
+      arg_list(2) = "cols";
+      cols_norm = feval ("norm", arg_list)(0).vector_value ();
+      arg_list.clear ();
       ichol_t <SparseMatrix,
                double, ichol_mult_real, ichol_checkpivot_real>
                (sm_l, L, cols_norm.fortran_vec (), droptol, michol);
@@ -456,14 +457,14 @@
     {
       Array <Complex> cols_norm;
       SparseComplexMatrix L;
-      param_list.append (args(0).sparse_complex_matrix_value ());
+      arg_list.append (args(0).sparse_complex_matrix_value ());
       SparseComplexMatrix sm_l =
-        feval ("tril", param_list)(0).sparse_complex_matrix_value ();
-      param_list(0) = sm_l;
-      param_list(1) = 1;
-      param_list(2) = "cols";
-      cols_norm = feval ("norm", param_list)(0).complex_vector_value ();
-      param_list.clear ();
+        feval ("tril", arg_list)(0).sparse_complex_matrix_value ();
+      arg_list(0) = sm_l;
+      arg_list(1) = 1;
+      arg_list(2) = "cols";
+      cols_norm = feval ("norm", arg_list)(0).complex_vector_value ();
+      arg_list.clear ();
       ichol_t <SparseComplexMatrix,
                Complex, ichol_mult_complex, ichol_checkpivot_complex>
                (sm_l, L, cols_norm.fortran_vec (),
--- a/libinterp/corefcn/__ilu__.cc	Tue Mar 08 13:52:03 2016 -0800
+++ b/libinterp/corefcn/__ilu__.cc	Wed Mar 09 09:55:32 2016 -0800
@@ -31,7 +31,7 @@
 #include "error.h"
 #include "parse.h"

-// That function implements the IKJ and JKI variants of Gaussian elimination to
+// This function implements the IKJ and JKI variants of Gaussian elimination to
 // perform the ILUTP decomposition.  The behavior is controlled by milu
 // parameter.  If milu = ['off'|'col'] the JKI version is performed taking
 // advantage of CCS format of the input matrix.  If milu = 'row' the input
@@ -40,11 +40,8 @@
 template <typename octave_matrix_t, typename T>
 void ilu_0 (octave_matrix_t& sm, const std::string milu = "off")
 {
-
   const octave_idx_type n = sm.cols ();
-  OCTAVE_LOCAL_BUFFER (octave_idx_type, iw, n);
-  OCTAVE_LOCAL_BUFFER (octave_idx_type, uptr, n);
-  octave_idx_type j1, j2, jrow, jw, i, k, jj;
+  octave_idx_type j1, j2, jrow, jw, i, j, k, jj;
   T tl, r;

   enum {OFF, ROW, COL};
@@ -59,24 +56,35 @@
   else
     opt = OFF;

+  // Input matrix pointers
   octave_idx_type* cidx = sm.cidx ();
   octave_idx_type* ridx = sm.ridx ();
   T* data = sm.data ();
+
+  // Working arrays
+  OCTAVE_LOCAL_BUFFER (octave_idx_type, iw, n);
+  OCTAVE_LOCAL_BUFFER (octave_idx_type, uptr, n);
+
+  // Initialize working arrays
   for (i = 0; i < n; i++)
     iw[i] = -1;
+
+  // Loop over all columns
   for (k = 0; k < n; k++)
     {
       j1 = cidx[k];
-      j2 = cidx[k+1] - 1;
-      octave_idx_type j;
-      for (j = j1; j <= j2; j++)
-        {
-          iw[ridx[j]] = j;
-        }
+      j2 = cidx[k+1];
+
+      if (j1 == j2)
+        error ("ilu: A has a zero on the diagonal");
+
+      for (j = j1; j < j2; j++)
+        iw[ridx[j]] = j;
+
       r = 0;
       j = j1;
-      jrow = ridx[j];
-      while ((jrow < k) && (j <= j2))
+      jrow = ridx[j1];
+      while ((jrow < k) && (j < j2))
         {
           if (opt == ROW)
             {
@@ -116,9 +124,10 @@
       if (data[j] == T(0))
         error ("ilu: encountered a pivot equal to 0");

-      for (i = j1; i <= j2; i++)
+      for (i = j1; i < j2; i++)
         iw[ridx[i]] = -1;
     }
+
   if (opt == ROW)
     sm = sm.transpose ();
 }
@@ -141,35 +150,34 @@
   std::string milu;

   // In ILU0 algorithm the zero-pattern of the input matrix is preserved so
-  // it's structure does not change during the algorithm.  The same input
+  // its structure does not change during the algorithm.  The same input
   // matrix is used to build the output matrix due to that fact.
-  octave_value_list param_list;
+  octave_value_list arg_list;
   if (! args(0).is_complex_type ())
     {
       SparseMatrix sm = args(0).sparse_matrix_value ();
       ilu_0 <SparseMatrix, double> (sm, milu);

-      param_list.append (sm);
-      retval(1) = feval ("triu", param_list)(0).sparse_matrix_value ();
+      arg_list.append (sm);
+      retval(1) = feval ("triu", arg_list)(0).sparse_matrix_value ();
       SparseMatrix eye =
-        feval ("speye", octave_value_list (octave_value (sm.cols ())))(0).sparse_matrix_value ();
-      param_list.append (-1);
+        feval ("speye", ovl (sm.cols ()))(0).sparse_matrix_value ();
+      arg_list.append (-1);
       retval(0) = eye +
-        feval ("tril", param_list)(0).sparse_matrix_value ();
+                  feval ("tril", arg_list)(0).sparse_matrix_value ();
     }
   else
     {
       SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
       ilu_0 <SparseComplexMatrix, Complex> (sm, milu);

-      param_list.append (sm);
-      retval(1) =
-        feval ("triu", param_list)(0).sparse_complex_matrix_value ();
+      arg_list.append (sm);
+      retval(1) = feval ("triu", arg_list)(0).sparse_complex_matrix_value ();
       SparseComplexMatrix eye =
-        feval ("speye", octave_value_list (octave_value (sm.cols ())))(0).sparse_complex_matrix_value ();
-      param_list.append (-1);
-      retval(0) =
-        eye + feval ("tril", param_list)(0).sparse_complex_matrix_value ();
+        feval ("speye", ovl (sm.cols ()))(0).sparse_complex_matrix_value ();
+      arg_list.append (-1);
+      retval(0) = eye +
+                  feval ("tril", arg_list)(0).sparse_complex_matrix_value ();
     }

   return retval;
@@ -181,7 +189,6 @@
                 T* rows_norm, const T droptol = 0,
                 const std::string milu = "off")
 {
-
   // Map the strings into chars for faster comparing inside loops
   char opt;
   enum {OFF, ROW, COL};
@@ -202,6 +209,7 @@
   max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
   max_len_l = sm_l.nnz ();
   max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
+
   // Extract pointers to the arrays for faster access inside loops
   octave_idx_type* cidx_in_u = sm_u.cidx ();
   octave_idx_type* ridx_in_u = sm_u.ridx ();
@@ -235,6 +243,7 @@

   T zero = T (0);

+  // Initialize working arrays
   cidx_u[0] = cidx_in_u[0];
   cidx_l[0] = cidx_in_l[0];
   for (i = 0; i < n; i++)
@@ -249,6 +258,7 @@
   cols_list_len = 0;
   rows_list_len = 0;

+  // Loop over all columns
   for (k = 0; k < n; k++)
     {
       // Load the working column and working row
@@ -434,6 +444,7 @@
   L = octave_matrix_t (n, n, total_len_l);
   U = octave_matrix_t (n, n, total_len_u);

+  // FIXME: Can these loops be replaced by std::copy?
   for (i = 0; i <= n; i++)
     L.cidx (i) = cidx_l[i];

@@ -478,56 +489,55 @@
   if (nargin == 3)
     milu = args(2).string_value ();

-  octave_value_list param_list;
+  octave_value_list arg_list;
   if (! args(0).is_complex_type ())
     {
       Array<double> cols_norm, rows_norm;
-      param_list.append (args(0).sparse_matrix_value ());
-      SparseMatrix sm_u = feval ("triu", param_list)(0).sparse_matrix_value ();
-      param_list.append (-1);
-      SparseMatrix sm_l = feval ("tril", param_list)(0).sparse_matrix_value ();
-      param_list(1) = "rows";
-      rows_norm = feval ("norm", param_list)(0).vector_value ();
-      param_list(1) = "cols";
-      cols_norm = feval ("norm", param_list)(0).vector_value ();
-      param_list.clear ();
+      arg_list.append (args(0).sparse_matrix_value ());
+      SparseMatrix sm_u = feval ("triu", arg_list)(0).sparse_matrix_value ();
+      arg_list.append (-1);
+      SparseMatrix sm_l = feval ("tril", arg_list)(0).sparse_matrix_value ();
+      arg_list(1) = "rows";
+      rows_norm = feval ("norm", arg_list)(0).vector_value ();
+      arg_list(1) = "cols";
+      cols_norm = feval ("norm", arg_list)(0).vector_value ();
+      arg_list.clear ();
       SparseMatrix U, L;
       ilu_crout <SparseMatrix, double> (sm_l, sm_u, L, U,
                                         cols_norm.fortran_vec (),
                                         rows_norm.fortran_vec (),
                                         droptol, milu);
-      param_list.append (octave_value (L.cols ()));
-      SparseMatrix eye =
-        feval ("speye", param_list)(0).sparse_matrix_value ();
+      arg_list.append (octave_value (L.cols ()));
+      SparseMatrix eye = feval ("speye", arg_list)(0).sparse_matrix_value ();
       return ovl (L + eye, U);
     }
   else
     {
       Array<Complex> cols_norm, rows_norm;
-      param_list.append (args(0).sparse_complex_matrix_value ());
+      arg_list.append (args(0).sparse_complex_matrix_value ());
       SparseComplexMatrix sm_u =
-        feval ("triu", param_list)(0).sparse_complex_matrix_value ();
-      param_list.append (-1);
+        feval ("triu", arg_list)(0).sparse_complex_matrix_value ();
+      arg_list.append (-1);
       SparseComplexMatrix sm_l =
-        feval ("tril", param_list)(0).sparse_complex_matrix_value ();
-      param_list(1) = "rows";
-      rows_norm = feval ("norm", param_list)(0).complex_vector_value ();
-      param_list(1) = "cols";
-      cols_norm = feval ("norm", param_list)(0).complex_vector_value ();
-      param_list.clear ();
+        feval ("tril", arg_list)(0).sparse_complex_matrix_value ();
+      arg_list(1) = "rows";
+      rows_norm = feval ("norm", arg_list)(0).complex_vector_value ();
+      arg_list(1) = "cols";
+      cols_norm = feval ("norm", arg_list)(0).complex_vector_value ();
+      arg_list.clear ();
       SparseComplexMatrix U, L;
       ilu_crout < SparseComplexMatrix, Complex >
                 (sm_l, sm_u, L, U, cols_norm.fortran_vec () ,
                  rows_norm.fortran_vec (), Complex (droptol), milu);

-      param_list.append (octave_value (L.cols ()));
+      arg_list.append (octave_value (L.cols ()));
       SparseComplexMatrix eye =
-        feval ("speye", param_list)(0).sparse_complex_matrix_value ();
+        feval ("speye", arg_list)(0).sparse_complex_matrix_value ();
       return ovl (L + eye, U);
     }
 }

-// That function implements the IKJ and JKI variants of gaussian elimination
+// This function implements the IKJ and JKI variants of gaussian elimination
 // to perform the ILUTP decomposition.  The behavior is controlled by milu
 // parameter.  If milu = ['off'|'col'] the JKI version is performed taking
 // advantage of CCS format of the input matrix.  Row pivoting is performed.
@@ -553,7 +563,7 @@

   const octave_idx_type n = sm.cols ();

-  // That is necessary for the JKI (milu = "row") variant.
+  // This is necessary for the JKI (milu = "row") variant.
   if (opt == ROW)
     sm = sm.transpose ();

@@ -572,12 +582,14 @@
   max_len_l = nnz_l;
   max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;

+  // Extract pointers to the arrays for faster access inside loops
   Array <octave_idx_type> cidx_out_l (dim_vector (n + 1, 1));
   octave_idx_type* cidx_l = cidx_out_l.fortran_vec ();
   Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
   octave_idx_type* ridx_l = ridx_out_l.fortran_vec ();
   Array <T> data_out_l (dim_vector (max_len_l ,1));
   T* data_l = data_out_l.fortran_vec ();
+
   // Data for U
   Array <octave_idx_type> cidx_out_u (dim_vector (n + 1, 1));
   octave_idx_type* cidx_u = cidx_out_u.fortran_vec ();
@@ -597,7 +609,7 @@
   octave_idx_type* perm = perm_vec.fortran_vec ();
   OCTAVE_LOCAL_BUFFER (octave_idx_type, uptr, n);

-
+  // Initialize working and permutation arrays
   cidx_l[0] = cidx_in[0];
   cidx_u[0] = cidx_in[0];
   for (i = 0; i < n; i++)
@@ -609,9 +621,9 @@
   total_len_u = 0;
   total_len_l = 0;

+  // Loop over all columns
   for (k = 0; k < n; k++)
     {
-
       for (j = cidx_in[k]; j < cidx_in[k+1]; j++)
         {
           p_perm = iperm[ridx_in[j]];
@@ -662,8 +674,8 @@
                     }
                 }

-              // Drop element from the U part in IKJ and L part in JKI
-              // variant (milu = [col|off])
+              // Drop element from the U part in IKJ
+              // and L part in JKI variant (milu = [col|off])
               if ((std::abs (w_data[jrow]) < (droptol * cols_norm[k]))
                   && (w_data[jrow] != zero))
                 {
@@ -703,7 +715,7 @@
                   it2 = it;
                 }
             }
-          // If the pivot is not the diagonal element update all.
+          // If the pivot is not the diagonal element update all
           p_perm = iperm[max_ind];
           if (max_ind != perm[k])
             {
@@ -725,8 +737,8 @@

         }

-      // Drop elements in the L part in the IKJ and from the U part in the JKI
-      // version.
+      // Drop elements in the L part in the IKJ version,
+      // and from the U part in the JKI version.
       it = iw_l.begin ();
       while (it != iw_l.end ())
         {
@@ -755,8 +767,6 @@
           w_data[k] += total_sum;
         }

-
-
       // Check if the pivot is zero and if udiag is active.
       // NOTE: If the pivot == 0 and udiag is active, then if milu = [col|row]
       //       will not preserve the row sum for that column/row.
@@ -777,7 +787,6 @@
             w_data[p_perm] = w_data[p_perm] / w_data[k];
           }

-
       if ((max_len_u - total_len_u) < n)
         {
           max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
@@ -808,6 +817,7 @@
             }
           w_data[*it] = 0;
         }
+
       // Expand working vector into L.
       w_len_l = 0;
       for (it = iw_l.begin (); it != iw_l.end (); ++it)
@@ -823,6 +833,7 @@
         }
       total_len_u += w_len_u;
       total_len_l += w_len_l;
+
       // Check if there are too many elements to be indexed with
       // octave_idx_type type due to fill-in during the process.
       if (total_len_l < 0 || total_len_u < 0)
@@ -948,30 +959,30 @@
   if (nargin == 5)
     udiag = args(4).double_value ();

-  octave_value_list param_list;
+  octave_value_list arg_list;
   octave_idx_type nnz_u, nnz_l;
   if (! args(0).is_complex_type ())
     {
       Array <double> rc_norm;
       SparseMatrix sm = args(0).sparse_matrix_value ();
-      param_list.append (sm);
-      nnz_u =  (feval ("triu", param_list)(0).sparse_matrix_value ()).nnz ();
-      param_list.append (-1);
-      nnz_l =  (feval ("tril", param_list)(0).sparse_matrix_value ()).nnz ();
+      arg_list.append (sm);
+      nnz_u =  (feval ("triu", arg_list)(0).sparse_matrix_value ()).nnz ();
+      arg_list.append (-1);
+      nnz_l =  (feval ("tril", arg_list)(0).sparse_matrix_value ()).nnz ();
       if (milu == "row")
-        param_list (1) = "rows";
+        arg_list (1) = "rows";
       else
-        param_list (1) = "cols";
-      rc_norm = feval ("norm", param_list)(0).vector_value ();
-      param_list.clear ();
+        arg_list (1) = "cols";
+      rc_norm = feval ("norm", arg_list)(0).vector_value ();
+      arg_list.clear ();
       Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
       SparseMatrix U, L;
       ilu_tp <SparseMatrix, double> (sm, L, U, nnz_u, nnz_l,
                                      rc_norm.fortran_vec (),
                                      perm, droptol, thresh, milu, udiag);
-      param_list.append (octave_value (L.cols ()));
+      arg_list.append (octave_value (L.cols ()));
       SparseMatrix eye =
-        feval ("speye", param_list)(0).sparse_matrix_value ();
+        feval ("speye", arg_list)(0).sparse_matrix_value ();
       if (milu == "row")
         {
           if (nargout == 3)
@@ -1002,27 +1013,27 @@
     {
       Array <Complex> rc_norm;
       SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
-      param_list.append (sm);
+      arg_list.append (sm);
       nnz_u =
-        feval ("triu", param_list)(0).sparse_complex_matrix_value ().nnz ();
-      param_list.append (-1);
+        feval ("triu", arg_list)(0).sparse_complex_matrix_value ().nnz ();
+      arg_list.append (-1);
       nnz_l =
-        feval ("tril", param_list)(0).sparse_complex_matrix_value ().nnz ();
+        feval ("tril", arg_list)(0).sparse_complex_matrix_value ().nnz ();
       if (milu == "row")
-        param_list(1) = "rows";
+        arg_list(1) = "rows";
       else
-        param_list(1) = "cols";
-      rc_norm = feval ("norm", param_list)(0).complex_vector_value ();
+        arg_list(1) = "cols";
+      rc_norm = feval ("norm", arg_list)(0).complex_vector_value ();
+      arg_list.clear ();
       Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
-      param_list.clear ();
       SparseComplexMatrix U, L;
       ilu_tp < SparseComplexMatrix, Complex>
               (sm, L, U, nnz_u, nnz_l, rc_norm.fortran_vec (), perm,
                Complex (droptol), Complex (thresh), milu, udiag);

-      param_list.append (octave_value (L.cols ()));
+      arg_list.append (octave_value (L.cols ()));
       SparseComplexMatrix eye =
-        feval ("speye", param_list)(0).sparse_complex_matrix_value ();
+        feval ("speye", arg_list)(0).sparse_complex_matrix_value ();
       if (milu == "row")
         {
           if (nargout == 3)