# HG changeset patch # User Eduardo Ramos (edu159) # Date 1408361536 -3600 # Node ID df64071e538c06ced186c8e1237ef186a24b8a00 # Parent 168c0aa9bb05562bd401a60393c7f4c7fb675692 Removed ichol0.cc, icholt.cc, ilu0.cc, iluc.cc, ilutp.cc. Created __ichol__.cc and __ilu__.cc. Minor bugs fixed. * libinterp/dldfcn/__ichol__.cc: File created now contains __ichol0__ and __icholt__ functions. * libinterp/dldfcn/__ilu__.cc: File created now contains __ilu0__ __iluc__ and __ilutp__ functions. * scripts/sparse/ichol.m: Tests have been moved from .cc files to this one. * changed scripts/sparse/ilu.m: Tests have been moved from .cc files to this one. diff -r 168c0aa9bb05 -r df64071e538c libinterp/dldfcn/__ichol__.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libinterp/dldfcn/__ichol__.cc Mon Aug 18 12:32:16 2014 +0100 @@ -0,0 +1,552 @@ +/** + * Copyright (C) 2013 Kai T. Ohlhus + * Copyright (C) 2014 Eduardo Ramos Fernández + * + * This file is part of Octave. + * + * Octave is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation; either version 3 of the License, or (at your option) any later + * version. + * + * Octave is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * Octave; see the file COPYING. If not, see . + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "defun-dld.h" +#include "parse.h" + +// Secondary functions for complex and real case used +// in ichol algorithms. +Complex ichol_mult_complex (Complex a, Complex b) +{ + b.imag (-std::imag (b)); + return a * b; +} + +bool ichol_checkpivot_complex (Complex pivot) +{ + if (pivot.imag () != 0) + { + error ("ichol: Non-real pivot encountered. \ + The matrix must be hermitian."); + return false; + } + else if (pivot.real () < 0) + { + error ("ichol: Non-positive pivot encountered."); + return false; + } + return true; + +} + +bool ichol_checkpivot_real (double pivot) +{ + if (pivot < 0) + { + error ("ichol: Non-positive pivot encountered."); + return false; + } + return true; +} + +double ichol_mult_real (double a, double b) +{ + return a * b; +} + +template +void ichol_0 (octave_matrix_t& sm, const std::string michol = "off") +{ + + const octave_idx_type n = sm.cols (); + octave_idx_type j1, jend, j2, jrow, jjrow, j, jw, i, k, jj, Llist_len, r; + T tl; + char opt; + enum {OFF, ON}; + if (michol == "on") + opt = ON; + else + opt = OFF; + + // Input matrix pointers + octave_idx_type* cidx = sm.cidx (); + octave_idx_type* ridx = sm.ridx (); + T* data = sm.data (); + + // Working arrays + OCTAVE_LOCAL_BUFFER (octave_idx_type, Lfirst, n); + OCTAVE_LOCAL_BUFFER (octave_idx_type, Llist, n); + OCTAVE_LOCAL_BUFFER (octave_idx_type, iw, n); + OCTAVE_LOCAL_BUFFER (T, dropsums, n); + + // Initialize working arrays + for (i = 0; i < n; i++) + { + iw[i] = -1; + Llist[i] = -1; + Lfirst[i] = -1; + dropsums[i] = 0; + } + + // Main loop + for (k = 0; k < n; k++) + { + j1 = cidx[k]; + j2 = cidx[k+1]; + for (j = j1; j < j2; j++) + iw[ridx[j]] = j; + + jrow = Llist [k]; + // Iterate over each non-zero element in the actual row. + while (jrow != -1) + { + jjrow = Lfirst[jrow]; + jend = cidx[jrow+1]; + for (jj = jjrow; jj < jend; jj++) + { + r = ridx[jj]; + jw = iw[r]; + tl = ichol_mult (data[jj], data[jjrow]); + if (jw != -1) + data[jw] -= tl; + else + // Because the simetry of the matrix we know the drops + // in the column r are also in the column k. + if (opt == ON) + { + dropsums[r] -= tl; + dropsums[k] -= tl; + } + } + // Update the linked list and the first entry of the + // actual column. + if ((jjrow + 1) < jend) + { + Lfirst[jrow]++; + j = jrow; + jrow = Llist[jrow]; + Llist[j] = Llist[ridx[Lfirst[j]]]; + Llist[ridx[Lfirst[j]]] = j; + } + else + jrow = Llist[jrow]; + } + + if (opt == ON) + data[j1] += dropsums[k]; + + if (ridx[j1] != k) + { + error ("ichol: There is a pivot equal to zero."); + break; + } + + if (! ichol_checkpivot (data[j1])) + break; + + data[cidx[k]] = std::sqrt (data[j1]); + + // Update Llist and Lfirst with the k-column information. + // Also scale the column elements by the pivot and reset + // the working array iw. + if (k < (n - 1)) + { + iw[ridx[j1]] = -1; + for(i = j1 + 1; i < j2; i++) + { + iw[ridx[i]] = -1; + data[i] /= data[j1]; + } + Lfirst[k] = j1; + if ((Lfirst[k] + 1) < j2) + { + Lfirst[k]++; + jjrow = ridx[Lfirst[k]]; + Llist[k] = Llist[jjrow]; + Llist[jjrow] = k; + } + } + } +} + +DEFUN_DLD (__ichol0__, args, nargout, "-*- texinfo -*-\n\ +@deftypefn {Loadable Function} {@var{L} =} __ichol0__ (@var{A})\n\ +@deftypefnx {Loadable Function} {@var{L} =} __ichol0__ (@var{A}, @var{michol})\n\ +Undocumented internal function.\n\ +@end deftypefn") + +{ + octave_value_list retval; + + int nargin = args.length (); + std::string michol = "off"; + + + if (nargout > 1 || nargin < 1 || nargin > 2) + { + print_usage (); + return retval; + } + + if (args(0).is_scalar_type () || ! args(0).is_sparse_type ()) + error ("__ichol0__: 1. parameter must be a sparse square matrix."); + + if (args(0).is_empty ()) + { + retval(0) = octave_value (SparseMatrix ()); + return retval; + } + + + if (nargin == 2) + { + michol = args(1).string_value (); + if (error_state || ! (michol == "on" || michol == "off")) + error ("__ichol0__: 2. parameter must be 'on' or 'off' character string."); + } + + + if (!error_state) + { + // In ICHOL0 algorithm the zero-pattern of the input matrix is preserved so + // it's structure does not change during the algorithm. The same input + // matrix is used to build the output matrix due to that fact. + octave_value_list param_list; + if (!args(0).is_complex_type ()) + { + SparseMatrix sm = args(0).sparse_matrix_value (); + param_list.append (sm); + sm = feval ("tril", param_list)(0).sparse_matrix_value (); + ichol_0 (sm, michol); + if (! error_state) + retval(0) = octave_value (sm); + } + else + { + SparseComplexMatrix sm = args(0).sparse_complex_matrix_value (); + param_list.append (sm); + sm = feval ("tril", param_list)(0).sparse_complex_matrix_value (); + ichol_0 (sm, michol); + if (! error_state) + retval(0) = octave_value (sm); + } + + } + + return retval; +} + +template +void ichol_t (const octave_matrix_t& sm, octave_matrix_t& L, const T* cols_norm, + const T droptol, const std::string michol = "off") + +{ + + const octave_idx_type n = sm.cols (); + octave_idx_type j, jrow, jend, jjrow, jw, i, k, jj, Llist_len, total_len, w_len, + max_len, ind; + + char opt; + enum {OFF, ON}; + if (michol == "on") + opt = ON; + else + opt = OFF; + + // Input matrix pointers + octave_idx_type* cidx = sm.cidx (); + octave_idx_type* ridx = sm.ridx (); + T* data = sm.data (); + + // Output matrix data structures. Because it is not known the + // final zero pattern of the output matrix due to fill-in elements, + // an heuristic approach has been adopted for memory allocation. The + // size of ridx_out_l and data_out_l is incremented 10% of their actual + // size (nnz(A) in the beginning). If that amount is less than n, their + // size is just incremented in n elements. This way the number of + // reallocations decrease throughout the process, obtaining a good performance. + max_len = sm.nnz (); + max_len += (0.1 * max_len) > n ? 0.1 * max_len : n; + Array cidx_out_l (dim_vector (n + 1, 1)); + octave_idx_type* cidx_l = cidx_out_l.fortran_vec (); + Array ridx_out_l (dim_vector (max_len ,1)); + octave_idx_type* ridx_l = ridx_out_l.fortran_vec (); + Array data_out_l (dim_vector (max_len, 1)); + T* data_l = data_out_l.fortran_vec (); + + // Working arrays + OCTAVE_LOCAL_BUFFER (T, w_data, n); + OCTAVE_LOCAL_BUFFER (octave_idx_type, Lfirst, n); + OCTAVE_LOCAL_BUFFER (octave_idx_type, Llist, n); + OCTAVE_LOCAL_BUFFER (T, col_drops, n); + std::vector vec; + vec.resize (n); + + + T zero = T (0); + cidx_l[0] = cidx[0]; + for (i = 0; i < n; i++) + { + Llist[i] = -1; + Lfirst[i] = -1; + w_data[i] = 0; + col_drops[i] = zero; + vec[i] = 0; + } + + total_len = 0; + for (k = 0; k < n; k++) + { + ind = 0; + for (j = cidx[k]; j < cidx[k+1]; j++) + { + w_data[ridx[j]] = data[j]; + if (ridx[j] != k) + { + vec[ind] = ridx[j]; + ind++; + } + } + jrow = Llist[k]; + while (jrow != -1) + { + jjrow = Lfirst[jrow]; + jend = cidx_l[jrow+1]; + for (jj = jjrow; jj < jend; jj++) + { + j = ridx_l[jj]; + // If the element in the j position of the row is zero, + // then it will become non-zero, so we add it to the + // vector that keeps track of non-zero elements in the working row. + if (w_data[j] == zero) + { + vec[ind] = j; + ind++; + } + w_data[j] -= ichol_mult (data_l[jj], data_l[jjrow]); + + } + // Update the actual column first element and update the + // linked list of the jrow row. + if ((jjrow + 1) < jend) + { + Lfirst[jrow]++; + j = jrow; + jrow = Llist[jrow]; + Llist[j] = Llist[ridx_l[Lfirst[j]]]; + Llist[ridx_l[Lfirst[j]]] = j; + } + else + jrow = Llist[jrow]; + } + + // Resizing output arrays + if ((max_len - total_len) < n) + { + max_len += (0.1 * max_len) > n ? 0.1 * max_len : n; + data_out_l.resize (dim_vector (max_len, 1)); + data_l = data_out_l.fortran_vec (); + ridx_out_l.resize (dim_vector (max_len, 1)); + ridx_l = ridx_out_l.fortran_vec (); + } + + // The sorting of the non-zero elements of the working column can be + // handled in a couple of ways. The most efficient two I found, are + // keeping the elements in an ordered binary search tree dinamically + // or keep them unsorted in a vector and at the end of the outer + // iteration order them. The last approach exhibit lower execution + // times. + std::sort (vec.begin (), vec.begin () + ind); + + data_l[total_len] = w_data[k]; + ridx_l[total_len] = k; + w_len = 1; + + // Extract then non-zero elements of working column and drop the + // elements that are lower than droptol * cols_norm[k]. + for (i = 0; i < ind ; i++) + { + jrow = vec[i]; + if (w_data[jrow] != zero) + { + if (std::abs (w_data[jrow]) < (droptol * cols_norm[k])) + { + if (opt == ON) + { + col_drops[k] += w_data[jrow]; + col_drops[jrow] += w_data[jrow]; + } + } + else + { + data_l[total_len + w_len] = w_data[jrow]; + ridx_l[total_len + w_len] = jrow; + w_len++; + } + vec[i] = 0; + } + w_data[jrow] = zero; + } + + // Compensate column sums --> michol option + if (opt == ON) + data_l[total_len] += col_drops[k]; + + if (data_l[total_len] == zero) + { + error ("ichol: There is a pivot equal to zero."); + break; + } + else if (! ichol_checkpivot (data_l[total_len])) + break; + + // Once the elements are dropped and compensation of columns + // sums are done, scale the elements by the pivot. + data_l[total_len] = std::sqrt (data_l[total_len]); + for (jj = total_len + 1; jj < (total_len + w_len); jj++) + data_l[jj] /= data_l[total_len]; + total_len += w_len; + // Check if there are too many elements to be indexed with octave_idx_type + // type due to fill-in during the process. + if (total_len < 0) + { + error ("ichol: Integer overflow. Too many fill-in elements in L"); + break; + } + cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len; + + // Update Llist and Lfirst with the k-column information. + if (k < (n - 1)) + { + Lfirst[k] = cidx_l[k]; + if ((Lfirst[k] + 1) < cidx_l[k+1]) + { + Lfirst[k]++; + jjrow = ridx_l[Lfirst[k]]; + Llist[k] = Llist[jjrow]; + Llist[jjrow] = k; + } + } + + } + + if (! error_state) + { + // Build the output matrices + L = octave_matrix_t (n, n, total_len); + for (i = 0; i <= n; i++) + L.cidx (i) = cidx_l[i]; + for (i = 0; i < total_len; i++) + { + L.ridx (i) = ridx_l[i]; + L.data (i) = data_l[i]; + } + } + +} + +DEFUN_DLD (__icholt__, args, nargout, "-*- texinfo -*-\n\ +@deftypefn {Loadable Function} {@var{L} =} __icholt__ (@var{A})\n\ +@deftypefnx {Loadable Function} {@var{L} =} __icholt__ (@var{A}, @var{droptol})\n\ +@deftypefnx {Loadable Function} {@var{L} =} __icholt__ (@var{A}, @var{droptol}, @var{michol})\n\ +Undocumented internal function.\n\ +@end deftypefn") +{ + octave_value_list retval; + int nargin = args.length (); + // Default values of parameters + std::string michol = "off"; + double droptol = 0; + + + if (nargout > 1 || nargin < 1 || nargin > 3) + { + print_usage (); + return retval; + } + + if (args(0).is_scalar_type () || ! args(0).is_sparse_type ()) + error ("__icholt__: 1. parameter must be a sparse square matrix."); + + if (args(0).is_empty ()) + { + retval(0) = octave_value (SparseMatrix ()); + return retval; + } + + if (! error_state && (nargin >= 2)) + { + droptol = args(1).double_value (); + if (error_state || (droptol < 0) || ! args(1).is_real_scalar ()) + error ("__icholt__: 2. parameter must be a positive real scalar."); + } + + if (! error_state && (nargin == 3)) + { + michol = args(2).string_value (); + if (error_state || !(michol == "on" || michol == "off")) + error ("__icholt__: 3. parameter must be 'on' or 'off' character string."); + } + + if (!error_state) + { + octave_value_list param_list; + if (! args(0).is_complex_type ()) + { + Array cols_norm; + SparseMatrix L; + param_list.append (args(0).sparse_matrix_value ()); + SparseMatrix sm_l = feval ("tril", + param_list)(0).sparse_matrix_value (); + param_list(0) = sm_l; + param_list(1) = 1; + param_list(2) = "cols"; + cols_norm = feval ("norm", param_list)(0).vector_value (); + param_list.clear (); + ichol_t + (sm_l, L, cols_norm.fortran_vec (), droptol, michol); + if (! error_state) + retval(0) = octave_value (L); + } + else + { + Array cols_norm; + SparseComplexMatrix L; + param_list.append (args(0).sparse_complex_matrix_value ()); + SparseComplexMatrix sm_l = feval ("tril", + param_list)(0).sparse_complex_matrix_value (); + param_list(0) = sm_l; + param_list(1) = 1; + param_list(2) = "cols"; + cols_norm = feval ("norm", param_list)(0).complex_vector_value (); + param_list.clear (); + ichol_t + (sm_l, L, cols_norm.fortran_vec (), Complex (droptol), michol); + if (! error_state) + retval(0) = octave_value (L); + } + + } + + return retval; +} + +/* +## No test needed for internal helper function. +%!assert (1) +*/ diff -r 168c0aa9bb05 -r df64071e538c libinterp/dldfcn/__ilu__.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libinterp/dldfcn/__ilu__.cc Mon Aug 18 12:32:16 2014 +0100 @@ -0,0 +1,1164 @@ +/** + * Copyright (C) 2013 Kai T. Ohlhus + * Copyright (C) 2014 Eduardo Ramos Fernández + * + * This file is part of Octave. + * + * Octave is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation; either version 3 of the License, or (at your option) any later + * version. + * + * Octave is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * Octave; see the file COPYING. If not, see . + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "defun-dld.h" +#include "parse.h" + +// That function implements the IKJ and JKI variants of gaussian elimination to +// perform the ILUTP decomposition. The behaviour is controlled by milu +// parameter. If milu = ['off'|'col'] the JKI version is performed taking +// advantage of CCS format of the input matrix. If milu = 'row' the input matrix +// has to be transposed to obtain the equivalent CRS structure so we can work +// efficiently with rows. In this case IKJ version is used. +template +void ilu_0 (octave_matrix_t& sm, const std::string milu = "off") +{ + + const octave_idx_type n = sm.cols (); + OCTAVE_LOCAL_BUFFER (octave_idx_type, iw, n); + OCTAVE_LOCAL_BUFFER (octave_idx_type, uptr, n); + octave_idx_type j1, j2, jrow, jw, i, k, jj; + T tl, r; + + enum {OFF, ROW, COL}; + char opt; + if (milu == "row") + { + opt = ROW; + sm = sm.transpose (); + } + else if (milu == "col") + opt = COL; + else + opt = OFF; + + octave_idx_type* cidx = sm.cidx (); + octave_idx_type* ridx = sm.ridx (); + T* data = sm.data (); + for (i = 0; i < n; i++) + iw[i] = -1; + for (k = 0; k < n; k++) + { + j1 = cidx[k]; + j2 = cidx[k+1] - 1; + octave_idx_type j; + for (j = j1; j <= j2; j++) + { + iw[ridx[j]] = j; + } + r = 0; + j = j1; + jrow = ridx[j]; + while ((jrow < k) && (j <= j2)) + { + if (opt == ROW) + { + tl = data[j] / data[uptr[jrow]]; + data[j] = tl; + } + for (jj = uptr[jrow] + 1; jj < cidx[jrow+1]; jj++) + { + jw = iw[ridx[jj]]; + if (jw != -1) + if (opt == ROW) + data[jw] -= tl * data[jj]; + else + data[jw] -= data[j] * data[jj]; + + else + // That is for the milu='row' + if (opt == ROW) + r += tl * data[jj]; + else if (opt == COL) + r += data[j] * data[jj]; + } + j++; + jrow = ridx[j]; + } + uptr[k] = j; + if(opt != OFF) + data[uptr[k]] -= r; + if (opt != ROW) + for (jj = uptr[k] + 1; jj < cidx[k+1]; jj++) + data[jj] /= data[uptr[k]]; + if (k != jrow) + { + error ("ilu: Your input matrix has a zero in the diagonal."); + break; + } + + if (data[j] == T(0)) + { + error ("ilu: There is a pivot equal to zero."); + break; + } + for(i = j1; i <= j2; i++) + iw[ridx[i]] = -1; + } + if (opt == ROW) + sm = sm.transpose (); +} + +DEFUN_DLD (__ilu0__, args, nargout, "-*- texinfo -*-\n\ +@deftypefn {Loadable Function} {[@var{L}, @var{U}] =} __ilu0__ (@var{A})\n\ +@deftypefnx {Loadable Function} {[@var{L}, @var{U}] =} __ilu0__ (@var{A}, @var{milu})\n\ +@deftypefnx {Loadable Function} {[@var{L}, @var{U}, @var{P}] =} __ilu0__ (@var{A}, @dots{})\n\ +Undocumented internal function.\n\ +@end deftypefn") +{ + octave_value_list retval; + + int nargin = args.length (); + std::string milu; + + + if (nargout > 2 || nargin < 1 || nargin > 2) + { + print_usage (); + return retval; + } + + if (args (0).is_empty ()) + { + retval(0) = octave_value (SparseMatrix()); + retval(1) = octave_value (SparseMatrix()); + return retval; + } + + if (args(0).is_scalar_type () || ! args(0).is_sparse_type ()) + error ("__ilu0__: 1. parameter must be a sparse square matrix."); + + if (nargin == 2) + { + milu = args(1).string_value (); + if (error_state || !(milu == "row" || milu == "col" || milu == "off")) + error ( + "__ilu0__: 2. parameter must be 'row', 'col' or 'off' character string."); + } + + + if (! error_state) + { + // In ILU0 algorithm the zero-pattern of the input matrix is preserved so + // it's structure does not change during the algorithm. The same input + // matrix is used to build the output matrix due to that fact. + octave_value_list param_list; + if (! args(0).is_complex_type ()) + { + SparseMatrix sm = args(0).sparse_matrix_value (); + ilu_0 (sm, milu); + if (!error_state) + { + param_list.append (sm); + retval(1) = octave_value ( + feval ("triu", param_list)(0).sparse_matrix_value ()); + SparseMatrix eye = feval ("speye", + octave_value_list ( + octave_value (sm.cols ())))(0).sparse_matrix_value (); + param_list.append (-1); + retval(0) = octave_value ( + eye + feval ("tril", param_list)(0).sparse_matrix_value ()); + + } + } + else + { + SparseComplexMatrix sm = args(0).sparse_complex_matrix_value (); + ilu_0 (sm, milu); + if (! error_state) + { + param_list.append (sm); + retval(1) = octave_value ( + feval ("triu", param_list)(0).sparse_complex_matrix_value ()); + SparseComplexMatrix eye = feval ("speye", + octave_value_list ( + octave_value (sm.cols ())))(0).sparse_complex_matrix_value (); + param_list.append (-1); + retval(0) = octave_value (eye + + feval ("tril", param_list)(0).sparse_complex_matrix_value ()); + } + } + + } + + return retval; +} + +template +void ilu_crout (octave_matrix_t& sm_l, octave_matrix_t& sm_u, + octave_matrix_t& L, octave_matrix_t& U, T* cols_norm, + T* rows_norm, const T droptol = 0, + const std::string milu = "off") +{ + + // Map the strings into chars to faster comparation inside loops + char opt; + enum {OFF, ROW, COL}; + if (milu == "row") + opt = ROW; + else if (milu == "col") + opt = COL; + else + opt = OFF; + + octave_idx_type jrow, i, j, k, jj, total_len_l, total_len_u, max_len_u, + max_len_l, w_len_u, w_len_l, cols_list_len, rows_list_len; + + const octave_idx_type n = sm_u.cols (); + sm_u = sm_u.transpose (); + + max_len_u = sm_u.nnz (); + max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n; + max_len_l = sm_l.nnz (); + max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n; + // Extract pointers to the arrays for faster access inside loops + octave_idx_type* cidx_in_u = sm_u.cidx (); + octave_idx_type* ridx_in_u = sm_u.ridx (); + T* data_in_u = sm_u.data (); + octave_idx_type* cidx_in_l = sm_l.cidx (); + octave_idx_type* ridx_in_l = sm_l.ridx (); + T* data_in_l = sm_l.data (); + + // L output arrays + Array ridx_out_l (dim_vector (max_len_l, 1)); + octave_idx_type* ridx_l = ridx_out_l.fortran_vec (); + Array data_out_l (dim_vector (max_len_l, 1)); + T* data_l = data_out_l.fortran_vec (); + + // U output arrays + Array ridx_out_u (dim_vector (max_len_u, 1)); + octave_idx_type* ridx_u = ridx_out_u.fortran_vec (); + Array data_out_u (dim_vector (max_len_u, 1)); + T* data_u = data_out_u.fortran_vec (); + + // Working arrays + OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_l, n + 1); + OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_u, n + 1); + OCTAVE_LOCAL_BUFFER (octave_idx_type, cols_list, n); + OCTAVE_LOCAL_BUFFER (octave_idx_type, rows_list, n); + OCTAVE_LOCAL_BUFFER (T, w_data_l, n); + OCTAVE_LOCAL_BUFFER (T, w_data_u, n); + OCTAVE_LOCAL_BUFFER (octave_idx_type, Ufirst, n); + OCTAVE_LOCAL_BUFFER (octave_idx_type, Lfirst, n); + OCTAVE_LOCAL_BUFFER (T, cr_sum, n); + + T zero = T (0); + + cidx_u[0] = cidx_in_u[0]; + cidx_l[0] = cidx_in_l[0]; + for (i = 0; i < n; i++) + { + w_data_u[i] = zero; + w_data_l[i] = zero; + cr_sum[i] = zero; + } + + total_len_u = 0; + total_len_l = 0; + cols_list_len = 0; + rows_list_len = 0; + + for (k = 0; k < n; k++) + { + + // Load the working column and working row + for (i = cidx_in_l[k]; i < cidx_in_l[k+1]; i++) + w_data_l[ridx_in_l[i]] = data_in_l[i]; + + for (i = cidx_in_u[k]; i < cidx_in_u[k+1]; i++) + w_data_u[ridx_in_u[i]] = data_in_u[i]; + + // Update U working row + for (j = 0; j < rows_list_len; j++) + { + if ((Ufirst[rows_list[j]] != -1)) + for (jj = Ufirst[rows_list[j]]; jj < cidx_u[rows_list[j]+1]; jj++) + { + jrow = ridx_u[jj]; + w_data_u[jrow] -= data_u[jj] * data_l[Lfirst[rows_list[j]]]; + } + } + // Update L working column + for (j = 0; j < cols_list_len; j++) + { + if (Lfirst[cols_list[j]] != -1) + for (jj = Lfirst[cols_list[j]]; jj < cidx_l[cols_list[j]+1]; jj++) + { + jrow = ridx_l[jj]; + w_data_l[jrow] -= data_l[jj] * data_u[Ufirst[cols_list[j]]]; + } + } + + if ((max_len_u - total_len_u) < n) + { + max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n; + data_out_u.resize (dim_vector (max_len_u, 1)); + data_u = data_out_u.fortran_vec (); + ridx_out_u.resize (dim_vector (max_len_u, 1)); + ridx_u = ridx_out_u.fortran_vec (); + } + + if ((max_len_l - total_len_l) < n) + { + max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n; + data_out_l.resize (dim_vector (max_len_l, 1)); + data_l = data_out_l.fortran_vec (); + ridx_out_l.resize (dim_vector (max_len_l, 1)); + ridx_l = ridx_out_l.fortran_vec (); + } + + // Expand the working row into the U output data structures + w_len_l = 0; + data_u[total_len_u] = w_data_u[k]; + ridx_u[total_len_u] = k; + w_len_u = 1; + for (i = k + 1; i < n; i++) + { + if (w_data_u[i] != zero) + { + if (std::abs (w_data_u[i]) < (droptol * rows_norm[k])) + { + if (opt == ROW) + cr_sum[k] += w_data_u[i]; + else if (opt == COL) + cr_sum[i] += w_data_u[i]; + } + else + { + data_u[total_len_u + w_len_u] = w_data_u[i]; + ridx_u[total_len_u + w_len_u] = i; + w_len_u++; + } + } + + // Expand the working column into the L output data structures + if (w_data_l[i] != zero) + { + if (std::abs (w_data_l[i]) < (droptol * cols_norm[k])) + { + if (opt == COL) + cr_sum[k] += w_data_l[i]; + else if (opt == ROW) + cr_sum[i] += w_data_l[i]; + } + else + { + data_l[total_len_l + w_len_l] = w_data_l[i]; + ridx_l[total_len_l + w_len_l] = i; + w_len_l++; + } + } + w_data_u[i] = zero; + w_data_l[i] = zero; + } + + // Compensate row and column sums --> milu option + if (opt == COL || opt == ROW) + data_u[total_len_u] += cr_sum[k]; + + // Check if the pivot is zero + if (data_u[total_len_u] == zero) + { + error ("ilu: There is a pivot equal to zero."); + break; + } + + // Scale the elements in L by the pivot + for (i = total_len_l ; i < (total_len_l + w_len_l); i++) + data_l[i] /= data_u[total_len_u]; + + + total_len_u += w_len_u; + total_len_l += w_len_l; + // Check if there are too many elements to be indexed with octave_idx_type + // type due to fill-in during the process. + if (total_len_l < 0 || total_len_u < 0) + { + error ("ilu: Integer overflow. Too many fill-in elements in L or U"); + break; + } + cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u; + cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l; + + // The tricky part of the algorithm. The arrays pointing to the first + // working element of each column in the next iteration (Lfirst) or + // the first working element of each row (Ufirst) are updated. Also the + // arrays working as lists cols_list and rows_list are filled with indexes + // pointing to Ufirst and Lfirst respectively. + // TODO: Maybe the -1 indicating in Ufirst and Lfirst, that no elements + // have to be considered in a certain column or row in next iteration, can + // be removed. It feels safer to me using such an indicator. + if (k < (n - 1)) + { + if (w_len_u > 0) + Ufirst[k] = cidx_u[k]; + else + Ufirst[k] = -1; + if (w_len_l > 0) + Lfirst[k] = cidx_l[k]; + else + Lfirst[k] = -1; + cols_list_len = 0; + rows_list_len = 0; + for (i = 0; i <= k; i++) + { + if (Ufirst[i] != -1) + { + jj = ridx_u[Ufirst[i]]; + if (jj < (k + 1)) + { + if (Ufirst[i] < (cidx_u[i+1])) + { + Ufirst[i]++; + if (Ufirst[i] == cidx_u[i+1]) + Ufirst[i] = -1; + else + jj = ridx_u[Ufirst[i]]; + } + } + if (jj == (k + 1)) + { + cols_list[cols_list_len] = i; + cols_list_len++; + } + } + + if (Lfirst[i] != -1) + { + jj = ridx_l[Lfirst[i]]; + if (jj < (k + 1)) + if(Lfirst[i] < (cidx_l[i+1])) + { + Lfirst[i]++; + if (Lfirst[i] == cidx_l[i+1]) + Lfirst[i] = -1; + else + jj = ridx_l[Lfirst[i]]; + } + if (jj == (k + 1)) + { + rows_list[rows_list_len] = i; + rows_list_len++; + } + } + } + } + } + + if (! error_state) + { + // Build the output matrices + L = octave_matrix_t (n, n, total_len_l); + U = octave_matrix_t (n, n, total_len_u); + for (i = 0; i <= n; i++) + L.cidx (i) = cidx_l[i]; + for (i = 0; i < total_len_l; i++) + { + L.ridx (i) = ridx_l[i]; + L.data (i) = data_l[i]; + } + for (i = 0; i <= n; i++) + U.cidx (i) = cidx_u[i]; + for (i = 0; i < total_len_u; i++) + { + U.ridx (i) = ridx_u[i]; + U.data (i) = data_u[i]; + } + U = U.transpose (); + } +} + +DEFUN_DLD (__iluc__, args, nargout, "-*- texinfo -*-\n\ +@deftypefn {Loadable Function} {[@var{L}, @var{U}] =} __iluc__ (@var{A})\n\ +@deftypefnx {Loadable Function} {[@var{L}, @var{U}] =} __iluc__ (@var{A}, @var{droptol}) \n\ +@deftypefnx {Loadable Function} {[@var{L}, @var{U}] =} __iluc__ (@var{A}, @var{droptol}, @var{milu})\n\ +@deftypefnx {Loadable Function} {[@var{L}, @var{U}, @var{P}] =} __iluc__ (@var{A}, @dots{})\n\ +Undocumented internal function.\n\ +@end deftypefn") +{ + + octave_value_list retval; + int nargin = args.length (); + std::string milu = "off"; + double droptol = 0; + + if (nargout != 2 || nargin < 1 || nargin > 3) + { + print_usage (); + return retval; + } + + // To be matlab compatible + if (args(0).is_empty ()) + { + retval(0) = octave_value (SparseMatrix()); + retval(1) = octave_value (SparseMatrix()); + return retval; + } + + if (args(0).is_scalar_type () || ! args(0).is_sparse_type ()) + error ("__iluc__: 1. parameter must be a sparse square matrix."); + + if (! error_state && (nargin >= 2)) + { + droptol = args(1).double_value (); + if (error_state || (droptol < 0) || ! args(1).is_real_scalar ()) + error ("__iluc__: 2. parameter must be a positive real scalar."); + } + + if (! error_state && (nargin == 3)) + { + milu = args(2).string_value (); + if (error_state || !(milu == "row" || milu == "col" || milu == "off")) + error ("__iluc__: 3. parameter must be 'row', 'col' or 'off' character string."); + } + + if (! error_state) + { + octave_value_list param_list; + if (! args(0).is_complex_type ()) + { + Array cols_norm, rows_norm; + param_list.append (args(0).sparse_matrix_value ()); + SparseMatrix sm_u = feval ("triu", param_list)(0).sparse_matrix_value (); + param_list.append (-1); + SparseMatrix sm_l = feval ("tril", param_list)(0).sparse_matrix_value (); + param_list(1) = "rows"; + rows_norm = feval ("norm", param_list)(0).vector_value (); + param_list(1) = "cols"; + cols_norm = feval ("norm", param_list)(0).vector_value (); + param_list.clear (); + SparseMatrix U; + SparseMatrix L; + ilu_crout (sm_l, sm_u, L, U, cols_norm.fortran_vec (), + rows_norm.fortran_vec (), droptol, milu); + if (! error_state) + { + param_list.append (octave_value (L.cols ())); + SparseMatrix eye = feval ("speye", param_list)(0).sparse_matrix_value (); + retval(0) = octave_value (L + eye); + retval(1) = octave_value (U); + } + } + else + { + Array cols_norm, rows_norm; + param_list.append (args(0).sparse_complex_matrix_value ()); + SparseComplexMatrix sm_u = feval("triu", + param_list)(0).sparse_complex_matrix_value (); + param_list.append (-1); + SparseComplexMatrix sm_l = feval("tril", + param_list)(0).sparse_complex_matrix_value (); + param_list(1) = "rows"; + rows_norm = feval ("norm", param_list)(0).complex_vector_value (); + param_list(1) = "cols"; + cols_norm = feval ("norm", param_list)(0).complex_vector_value (); + param_list.clear (); + SparseComplexMatrix U; + SparseComplexMatrix L; + ilu_crout < SparseComplexMatrix, Complex > + (sm_l, sm_u, L, U, cols_norm.fortran_vec () , + rows_norm.fortran_vec (), Complex (droptol), milu); + if (! error_state) + { + param_list.append (octave_value (L.cols ())); + SparseComplexMatrix eye = feval ("speye", + param_list)(0).sparse_complex_matrix_value (); + retval(0) = octave_value (L + eye); + retval(1) = octave_value (U); + } + } + } + return retval; +} + +// That function implements the IKJ and JKI variants of gaussian elimination +// to perform the ILUTP decomposition. The behaviour is controlled by milu +// parameter. If milu = ['off'|'col'] the JKI version is performed taking +// advantage of CCS format of the input matrix. Row pivoting is performed. +// If milu = 'row' the input matrix has to be transposed to obtain the +// equivalent CRS structure so we can work efficiently with rows. In that +// case IKJ version is used and column pivoting is performed. + +template +void ilu_tp (octave_matrix_t& sm, octave_matrix_t& L, octave_matrix_t& U, + octave_idx_type nnz_u, octave_idx_type nnz_l, T* cols_norm, + Array & perm_vec, const T droptol = T(0), + const T thresh = T(0), const std::string milu = "off", + const double udiag = 0) +{ + char opt; + enum {OFF, ROW, COL}; + if (milu == "row") + opt = ROW; + else if (milu == "col") + opt = COL; + else + opt = OFF; + + const octave_idx_type n = sm.cols (); + + // That is necessary for the JKI (milu = "row") variant. + if (opt == ROW) + sm = sm.transpose(); + + // Extract pointers to the arrays for faster access inside loops + octave_idx_type* cidx_in = sm.cidx (); + octave_idx_type* ridx_in = sm.ridx (); + T* data_in = sm.data (); + octave_idx_type jrow, i, j, k, jj, c, total_len_l, total_len_u, p_perm, + max_ind, max_len_l, max_len_u; + T tl, aux, maximum; + + max_len_u = nnz_u; + max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n; + max_len_l = nnz_l; + max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n; + + Array cidx_out_l (dim_vector (n + 1, 1)); + octave_idx_type* cidx_l = cidx_out_l.fortran_vec (); + Array ridx_out_l (dim_vector (max_len_l, 1)); + octave_idx_type* ridx_l = ridx_out_l.fortran_vec (); + Array data_out_l (dim_vector (max_len_l ,1)); + T* data_l = data_out_l.fortran_vec (); + // Data for U + Array cidx_out_u (dim_vector (n + 1, 1)); + octave_idx_type* cidx_u = cidx_out_u.fortran_vec (); + Array ridx_out_u (dim_vector (max_len_u, 1)); + octave_idx_type* ridx_u = ridx_out_u.fortran_vec (); + Array data_out_u (dim_vector (max_len_u, 1)); + T* data_u = data_out_u.fortran_vec(); + + // Working arrays and permutation arrays + octave_idx_type w_len_u, w_len_l; + T total_sum, partial_col_sum, partial_row_sum; + std::set iw_l; + std::set iw_u; + std::set ::iterator it, it2; + OCTAVE_LOCAL_BUFFER (T, w_data, n); + OCTAVE_LOCAL_BUFFER (octave_idx_type, iperm, n); + octave_idx_type* perm = perm_vec.fortran_vec (); + OCTAVE_LOCAL_BUFFER (octave_idx_type, uptr, n); + + + T zero = T(0); + cidx_l[0] = cidx_in[0]; + cidx_u[0] = cidx_in[0]; + for (i = 0; i < n; i++) + { + w_data[i] = 0; + perm[i] = i; + iperm[i] = i; + } + total_len_u = 0; + total_len_l = 0; + + for (k = 0; k < n; k++) + { + + for (j = cidx_in[k]; j < cidx_in[k+1]; j++) + { + p_perm = iperm[ridx_in[j]]; + w_data[iperm[ridx_in[j]]] = data_in[j]; + if (p_perm > k) + iw_l.insert (ridx_in[j]); + else + iw_u.insert (p_perm); + } + + it = iw_u.begin (); + jrow = *it; + total_sum = zero; + while ((jrow < k) && (it != iw_u.end ())) + { + if (opt == COL) + partial_col_sum = w_data[jrow]; + if (w_data[jrow] != zero) + { + if (opt == ROW) + { + partial_row_sum = w_data[jrow]; + tl = w_data[jrow] / data_u[uptr[jrow]]; + } + for (jj = cidx_l[jrow]; jj < cidx_l[jrow+1]; jj++) + { + p_perm = iperm[ridx_l[jj]]; + aux = w_data[p_perm]; + if (opt == ROW) + { + w_data[p_perm] -= tl * data_l[jj]; + partial_row_sum += tl * data_l[jj]; + } + else + { + tl = data_l[jj] * w_data[jrow]; + w_data[p_perm] -= tl; + if (opt == COL) + partial_col_sum += tl; + } + + if ((aux == zero) && (w_data[p_perm] != zero)) + { + if (p_perm > k) + iw_l.insert (ridx_l[jj]); + else + iw_u.insert (p_perm); + } + } + + // Drop element from the U part in IKJ and L part in JKI + // variant (milu = [col|off]) + if ((std::abs (w_data[jrow]) < (droptol * cols_norm[k])) + && (w_data[jrow] != zero)) + { + if (opt == COL) + total_sum += partial_col_sum; + else if (opt == ROW) + total_sum += partial_row_sum; + w_data[jrow] = zero; + it2 = it; + it++; + iw_u.erase (it2); + jrow = *it; + continue; + } + else + // This is the element scaled by the pivot in the actual iteration + if (opt == ROW) + w_data[jrow] = tl; + } + jrow = *(++it); + } + + // Search for the pivot and update iw_l and iw_u if the pivot is not the + // diagonal element + if ((thresh > zero) && (k < (n - 1))) + { + maximum = std::abs (w_data[k]) / thresh; + max_ind = perm[k]; + for (it = iw_l.begin (); it != iw_l.end (); ++it) + { + p_perm = iperm[*it]; + if (std::abs (w_data[p_perm]) > maximum) + { + maximum = std::abs (w_data[p_perm]); + max_ind = *it; + it2 = it; + } + } + // If the pivot is not the diagonal element update all. + p_perm = iperm[max_ind]; + if (max_ind != perm[k]) + { + iw_l.erase (it2); + if (w_data[k] != zero) + iw_l.insert (perm[k]); + else + iw_u.insert (k); + // Swap data and update permutation vectors + aux = w_data[k]; + iperm[perm[p_perm]] = k; + iperm[perm[k]] = p_perm; + c = perm[k]; + perm[k] = perm[p_perm]; + perm[p_perm] = c; + w_data[k] = w_data[p_perm]; + w_data[p_perm] = aux; + } + + } + + // Drop elements in the L part in the IKJ and from the U part in the JKI + // version. + it = iw_l.begin (); + while (it != iw_l.end ()) + { + p_perm = iperm[*it]; + if (droptol > zero) + if (std::abs (w_data[p_perm]) < (droptol * cols_norm[k])) + { + if (opt != OFF) + total_sum += w_data[p_perm]; + w_data[p_perm] = zero; + it2 = it; + it++; + iw_l.erase (it2); + continue; + } + + it++; + } + + // If milu =[row|col] sumation is preserved --> Compensate diagonal element. + if (opt != OFF) + { + if ((total_sum > zero) && (w_data[k] == zero)) + iw_u.insert (k); + w_data[k] += total_sum; + } + + + + // Check if the pivot is zero and if udiag is active. + // NOTE: If the pivot == 0 and udiag is active, then if milu = [col|row] + // will not preserve the row sum for that column/row. + if (w_data[k] == zero) + { + if (udiag == 1) + { + w_data[k] = droptol; + iw_u.insert (k); + } + else + { + error ("ilu: There is a pivot equal to zero."); + break; + } + } + + // Scale the elements on the L part for IKJ version (milu = [col|off]) + if (opt != ROW) + for (it = iw_l.begin (); it != iw_l.end (); ++it) + { + p_perm = iperm[*it]; + w_data[p_perm] = w_data[p_perm] / w_data[k]; + } + + + if ((max_len_u - total_len_u) < n) + { + max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n; + data_out_u.resize (dim_vector (max_len_u, 1)); + data_u = data_out_u.fortran_vec (); + ridx_out_u.resize (dim_vector (max_len_u, 1)); + ridx_u = ridx_out_u.fortran_vec (); + } + + if ((max_len_l - total_len_l) < n) + { + max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n; + data_out_l.resize (dim_vector (max_len_l, 1)); + data_l = data_out_l.fortran_vec (); + ridx_out_l.resize (dim_vector (max_len_l, 1)); + ridx_l = ridx_out_l.fortran_vec (); + } + + // Expand working vector into U. + w_len_u = 0; + for (it = iw_u.begin (); it != iw_u.end (); ++it) + { + if (w_data[*it] != zero) + { + data_u[total_len_u + w_len_u] = w_data[*it]; + ridx_u[total_len_u + w_len_u] = *it; + w_len_u++; + } + w_data[*it] = 0; + } + // Expand working vector into L. + w_len_l = 0; + for (it = iw_l.begin (); it != iw_l.end (); ++it) + { + p_perm = iperm[*it]; + if (w_data[p_perm] != zero) + { + data_l[total_len_l + w_len_l] = w_data[p_perm]; + ridx_l[total_len_l + w_len_l] = *it; + w_len_l++; + } + w_data[p_perm] = 0; + } + total_len_u += w_len_u; + total_len_l += w_len_l; + // Check if there are too many elements to be indexed with octave_idx_type + // type due to fill-in during the process. + if (total_len_l < 0 || total_len_u < 0) + { + error ("ilu: Integer overflow. Too many fill-in elements in L or U"); + break; + } + if (opt == ROW) + uptr[k] = total_len_u - 1; + cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u; + cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l; + + iw_l.clear (); + iw_u.clear (); + } + + if (! error_state) + { + octave_matrix_t *L_ptr; + octave_matrix_t *U_ptr; + octave_matrix_t diag (n, n, n); + + // L and U are interchanged if milu = 'row'. It is a matter + // of nomenclature to re-use code with both IKJ and JKI + // versions of the algorithm. + if (opt == ROW) + { + L_ptr = &U; + U_ptr = &L; + L = octave_matrix_t (n, n, total_len_u - n); + U = octave_matrix_t (n, n, total_len_l); + } + else + { + L_ptr = &L; + U_ptr = &U; + L = octave_matrix_t (n, n, total_len_l); + U = octave_matrix_t (n, n, total_len_u); + } + + for (i = 0; i <= n; i++) + { + L_ptr->cidx (i) = cidx_l[i]; + U_ptr->cidx (i) = cidx_u[i]; + if (opt == ROW) + U_ptr->cidx (i) -= i; + } + + for (i = 0; i < n; i++) + { + if (opt == ROW) + diag.elem (i,i) = data_u[uptr[i]]; + j = cidx_l[i]; + + while (j < cidx_l[i+1]) + { + L_ptr->ridx (j) = ridx_l[j]; + L_ptr->data (j) = data_l[j]; + j++; + } + j = cidx_u[i]; + + while (j < cidx_u[i+1]) + { + c = j; + if (opt == ROW) + { + // The diagonal is removed from L if milu = 'row'. + // That is because is convenient to have it inside + // the L part to carry out the process. + if (ridx_u[j] == i) + { + j++; + continue; + } + else + c -= i; + } + U_ptr->data (c) = data_u[j]; + U_ptr->ridx (c) = ridx_u[j]; + j++; + } + } + + if (opt == ROW) + { + U = U.transpose (); + // The diagonal, conveniently permuted is added to U + U += diag.index (idx_vector::colon, perm_vec); + L = L.transpose (); + } + } +} + +DEFUN_DLD (__ilutp__, args, nargout, "-*- texinfo -*-\n\ +@deftypefn {Loadable Function} {[@var{L}, @var{U}] =} __ilutp__ (@var{A})\n\ +@deftypefnx {Loadable Function} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol})\n\ +@deftypefnx {Loadable Function} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol}, @var{thresh})\n\ +@deftypefnx {Loadable Function} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol}, @var{thresh}, @var{milu})\n\ +@deftypefnx {Loadable Function} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol}, @var{thresh}, @var{milu}, @var{udiag})\n\ +@deftypefnx {Loadable Function} {[@var{L}, @var{U}, @var{P}] =} __ilutp__ (@var{A}, @dots{})\n\ +Undocumented internal function.\n\ +@end deftypefn") +{ + octave_value_list retval; + + int nargin = args.length (); + std::string milu = ""; + double droptol, thresh; + double udiag; + + + if (nargout < 2 || nargout > 3 || nargin < 1 || nargin > 5) + { + print_usage (); + return retval; + } + + // To be matlab compatible + if (args(0).is_empty ()) + { + retval(0) = octave_value (SparseMatrix ()); + retval(1) = octave_value (SparseMatrix ()); + if (nargout == 3) + retval(2) = octave_value (SparseMatrix ()); + return retval; + } + + if (args(0).is_scalar_type () || ! args(0).is_sparse_type () ) + error ("__ilutp__: 1. parameter must be a sparse square matrix."); + + if (! error_state && (nargin >= 2)) + { + droptol = args(1).double_value (); + if (error_state || (droptol < 0) || ! args(1).is_real_scalar ()) + error ("__ilutp__: 2. parameter must be a positive scalar."); + } + + if (! error_state && (nargin >= 3)) + { + thresh = args(2).double_value (); + if (error_state || ! args(2).is_real_scalar () || (thresh < 0) || thresh > 1) + error ("__ilutp__: 3. parameter must be a scalar 0 <= thresh <= 1."); + } + + if (! error_state && (nargin >= 4)) + { + milu = args(3).string_value (); + if (error_state || !(milu == "row" || milu == "col" || milu == "off")) + error ("__ilutp__: 4. parameter must be 'row', 'col' or 'off' character string."); + } + + if (! error_state && (nargin == 5)) + { + udiag = args(4).double_value (); + if (error_state || ! args(4).is_real_scalar () || ((udiag != 0) + && (udiag != 1))) + error ("__ilutp__: 5. parameter must be a scalar with value 1 or 0."); + } + + if (! error_state) + { + octave_value_list param_list; + octave_idx_type nnz_u, nnz_l; + if (! args(0).is_complex_type ()) + { + Array rc_norm; + SparseMatrix sm = args(0).sparse_matrix_value (); + param_list.append (sm); + nnz_u = (feval ("triu", param_list)(0).sparse_matrix_value ()).nnz (); + param_list.append (-1); + nnz_l = (feval ("tril", param_list)(0).sparse_matrix_value ()).nnz (); + if (milu == "row") + param_list (1) = "rows"; + else + param_list (1) = "cols"; + rc_norm = feval ("norm", param_list)(0).vector_value (); + param_list.clear (); + Array perm (dim_vector (sm.cols (), 1)); + SparseMatrix U; + SparseMatrix L; + ilu_tp (sm, L, U, nnz_u, nnz_l, rc_norm.fortran_vec (), + perm, droptol, thresh, milu, udiag); + if (! error_state) + { + param_list.append (octave_value (L.cols ())); + SparseMatrix eye = feval ("speye", param_list)(0).sparse_matrix_value (); + if (milu == "row") + { + retval(0) = octave_value (L + eye); + if (nargout == 2) + retval(1) = octave_value (U); + else if (nargout == 3) + { + retval(1) = octave_value (U.index (idx_vector::colon, perm)); + retval(2) = octave_value (eye.index (idx_vector::colon, perm)); + } + } + else + { + retval(1) = octave_value (U); + if (nargout == 2) + retval(0) = octave_value (L + eye.index (perm, idx_vector::colon)); + else if (nargout == 3) + { + retval(0) = octave_value (L.index (perm, idx_vector::colon) + eye); + retval(2) = octave_value (eye.index (perm, idx_vector::colon)); + } + } + } + } + else + { + Array rc_norm; + SparseComplexMatrix sm = args(0).sparse_complex_matrix_value (); + param_list.append (sm); + nnz_u = feval ("triu", param_list)(0).sparse_complex_matrix_value ().nnz (); + param_list.append (-1); + nnz_l = feval ("tril", param_list)(0).sparse_complex_matrix_value ().nnz (); + if (milu == "row") + param_list (1) = "rows"; + else + param_list (1) = "cols"; + rc_norm = feval ("norm", param_list)(0).complex_vector_value (); + Array perm (dim_vector (sm.cols (), 1)); + param_list.clear (); + SparseComplexMatrix U; + SparseComplexMatrix L; + ilu_tp < SparseComplexMatrix, Complex> + (sm, L, U, nnz_u, nnz_l, rc_norm.fortran_vec (), perm, + Complex (droptol), Complex (thresh), milu, udiag); + + if (! error_state) + { + param_list.append (octave_value (L.cols ())); + SparseComplexMatrix eye = feval ("speye", + param_list)(0).sparse_complex_matrix_value (); + if (milu == "row") + { + retval(0) = octave_value (L + eye); + if (nargout == 2) + retval(1) = octave_value (U); + else if (nargout == 3) + { + retval(1) = octave_value (U.index (idx_vector::colon, perm)); + retval(2) = octave_value (eye.index (idx_vector::colon, perm)); + } + } + else + { + retval(1) = octave_value (U); + if (nargout == 2) + retval(0) = octave_value (L + eye.index (perm, idx_vector::colon)) ; + else if (nargout == 3) + { + retval(0) = octave_value (L.index (perm, idx_vector::colon) + eye); + retval(2) = octave_value (eye.index (perm, idx_vector::colon)); + } + } + } + } + + } + + return retval; +} + +/* +## No test needed for internal helper function. +%!assert (1) +*/ diff -r 168c0aa9bb05 -r df64071e538c libinterp/dldfcn/ichol0.cc --- a/libinterp/dldfcn/ichol0.cc Tue Aug 12 15:58:18 2014 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,363 +0,0 @@ -/** - * Copyright (C) 2014 Eduardo Ramos Fernández - * - * This file is part of Octave. - * - * Octave is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the Free Software - * Foundation; either version 3 of the License, or (at your option) any later - * version. - * - * Octave is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along with - * Octave; see the file COPYING. If not, see . - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "defun-dld.h" -#include "parse.h" - -// Secondary functions specialiced for complex or real case used -// in icholt algorithms. -template < typename T > inline T -ichol_mult_complex (T a, T b) -{ - b.imag (-std::imag (b)); - return a * b; -} - -template < typename T > inline bool -ichol_checkpivot_complex (T pivot) -{ - if (pivot.imag () != 0) - { - error ("ichol0: Non-real pivot encountered. \ - The matrix must be hermitian."); - return false; - } - else if (pivot.real () < 0) - { - error ("ichol0: Non-positive pivot encountered."); - return false; - } - return true; - -} - -template < typename T > inline bool -ichol_checkpivot_real (T pivot) -{ - if (pivot < T(0)) - { - error ("ichol0: Non-positive pivot encountered."); - return false; - } - return true; -} - -template < typename T> inline T -ichol_mult_real (T a, T b) -{ - return a * b; -} - - -template -void ichol_0 (octave_matrix_t& sm, const std::string michol = "off") -{ - - const octave_idx_type n = sm.cols (); - octave_idx_type j1, jend, j2, jrow, jjrow, j, jw, i, k, jj, Llist_len, r; - - T tl; - char opt; - enum {OFF, ON}; - if (michol == "on") - opt = ON; - else - opt = OFF; - - // Input matrix pointers - octave_idx_type* cidx = sm.cidx (); - octave_idx_type* ridx = sm.ridx (); - T* data = sm.data (); - - // Working arrays - OCTAVE_LOCAL_BUFFER (octave_idx_type, Lfirst, n); - OCTAVE_LOCAL_BUFFER (octave_idx_type, Llist, n); - OCTAVE_LOCAL_BUFFER (octave_idx_type, iw, n); - OCTAVE_LOCAL_BUFFER (T, dropsums, n); - - // Initialise working arrays - for (i = 0; i < n; i++) - { - iw[i] = -1; - Llist[i] = -1; - Lfirst[i] = -1; - dropsums[i] = 0; - } - - // Main loop - for (k = 0; k < n; k++) - { - j1 = cidx[k]; - j2 = cidx[k+1]; - for (j = j1; j < j2; j++) - iw[ridx[j]] = j; - - jrow = Llist [k]; - // Iterate over each non-zero element in the actual row. - while (jrow != -1) - { - jjrow = Lfirst[jrow]; - jend = cidx[jrow+1]; - for (jj = jjrow; jj < jend; jj++) - { - r = ridx[jj]; - jw = iw[r]; - tl = ichol_mult (data[jj], data[jjrow]); - if (jw != -1) - data[jw] -= tl; - else - // Because of simetry of the matrix we know the drops - // in the column r are also in the column k. - if (opt == ON) - { - dropsums[r] -= tl; - dropsums[k] -= tl; - } - } - // Update the linked list and the first entry of the - // actual column. - if ((jjrow + 1) < jend) - { - Lfirst[jrow]++; - j = jrow; - jrow = Llist[jrow]; - Llist[j] = Llist[ridx[Lfirst[j]]]; - Llist[ridx[Lfirst[j]]] = j; - } - else - jrow = Llist[jrow]; - } - - if (opt == ON) - data[j1] += dropsums[k]; - - if (ridx[j1] != k) - { - error ("ichol0: There is a pivot equal to zero."); - break; - } - - if (!ichol_checkpivot (data[j1])) - break; - - data[cidx[k]] = std::sqrt (data[j1]); - - // Update Llist and Lfirst with the k-column information. - // Also scale the column elements by the pivot and reset - // the working array iw. - if (k < (n - 1)) - { - iw[ridx[j1]] = -1; - for(i = j1 + 1; i < j2; i++) - { - iw[ridx[i]] = -1; - data[i] /= data[j1]; - } - Lfirst[k] = j1; - if ((Lfirst[k] + 1) < j2) - { - Lfirst[k]++; - jjrow = ridx[Lfirst[k]]; - Llist[k] = Llist[jjrow]; - Llist[jjrow] = k; - } - } - } -} - -DEFUN_DLD (ichol0, args, nargout, "-*- texinfo -*-\n\ -@deftypefn {Loadable Function} {@var{L} =} ichol0 (@var{A}, @var{michol})\n\ -\n\ -Computes the no fill Incomplete Cholesky factorization [IC(0)] of A \ -which must be an square hermitian matrix in the complex case and a symmetric \ -positive definite matrix in the real one. \ -\n\ -\n\ -@code{@var{L} = ichol0 (@var{A}, @var{michol})} \ -computes the IC(0) of @var{A}, such that @code{@var{L} * @var{L}'} which \ -is an approximation of the square sparse hermitian matrix @var{A}. \ -The parameter @var{michol} decides whether the Modified IC(0) should \ -be performed. This compensates the main diagonal of \ -@var{L}, such that @code{@var{A} * @var{e} = @var{L} * @var{L}' * @var{e}} \ -with @code{@var{e} = ones (size (@var{A}, 2), 1))} holds. \n\ -\n\ -For more information about the algorithms themselves see:\n\ -\n\ -[1] Saad, Yousef. \"Preconditioning Techniques.\" Iterative Methods for Sparse Linear \ -Systems. PWS Publishing Company, 1996. \ -\n\ -@seealso{ichol, icholt, chol, ilu}\n\ -@end deftypefn") - -{ - octave_value_list retval; - - int nargin = args.length (); - std::string michol = "off"; - - - if (nargout > 1 || nargin < 1 || nargin > 2) - { - print_usage (); - return retval; - } - - if (args (0).is_scalar_type () || !args (0).is_sparse_type ()) - error ("ichol0: 1. parameter must be a sparse square matrix."); - - if (args (0).is_empty ()) - { - retval (0) = octave_value (SparseMatrix ()); - return retval; - } - - - if (nargin == 2) - { - michol = args (1).string_value (); - if (error_state || ! (michol == "on" || michol == "off")) - error ("ichol0: 2. parameter must be 'on' or 'off' character string."); - } - - - if (!error_state) - { - // In ICHOL0 algorithm the zero-pattern of the input matrix is preserved so - // it's structure does not change during the algorithm. The same input - // matrix is used to build the output matrix due to that fact. - octave_value_list param_list; - if (!args (0).is_complex_type ()) - { - SparseMatrix sm = args (0).sparse_matrix_value (); - param_list.append (sm); - sm = feval ("tril", param_list)(0).sparse_matrix_value (); - ichol_0 (sm, michol); - if (! error_state) - retval (0) = octave_value (sm); - } - else - { - SparseComplexMatrix sm = args (0).sparse_complex_matrix_value (); - param_list.append (sm); - sm = feval ("tril", param_list) (0).sparse_complex_matrix_value (); - ichol_0 (sm, michol); - if (! error_state) - retval (0) = octave_value (sm); - } - - } - - return retval; -} - -/* -%% Real matrices -%!shared A_1, A_2, A_3, A_4, A_5 -%! A_1 = [ 0.37, -0.05, -0.05, -0.07; -%! -0.05, 0.116, 0.0, -0.05; -%! -0.05, 0.0, 0.116, -0.05; -%! -0.07, -0.05, -0.05, 0.202]; -%! A_1 = sparse(A_1); -%! -%! A_2 = gallery ('poisson', 30); -%! -%! A_3 = gallery ('tridiag', 50); -%! -%! nx = 400; ny = 200; -%! hx = 1 / (nx + 1); hy = 1 / (ny + 1); -%! Dxx = spdiags ([ones(nx, 1), -2 * ones(nx, 1), ones(nx, 1)], [-1 0 1 ], nx, nx) / (hx ^ 2); -%! Dyy = spdiags ([ones(ny, 1), -2 * ones(ny, 1), ones(ny, 1)], [-1 0 1 ], ny, ny) / (hy ^ 2); -%! A_4 = -kron (Dxx, speye (ny)) - kron (speye (nx), Dyy); -%! A_4 = sparse (A_4); -%! -%! A_5 = [ 0.37, -0.05, -0.05, -0.07; -%! -0.05, 0.116, 0.0, -0.05 + 0.05i; -%! -0.05, 0.0, 0.116, -0.05; -%! -0.07, -0.05 - 0.05i, -0.05, 0.202]; -%! A_5 = sparse(A_5); -%! A_6 = [ 0.37, -0.05 - i, -0.05, -0.07; -%! -0.05 + i, 0.116, 0.0, -0.05; -%! -0.05, 0.0, 0.116, -0.05; -%! -0.07, -0.05, -0.05, 0.202]; -%! A_6 = sparse(A_6); -%! A_7 = A_5; -%! A_7(1) = 2i; -%! -%% Test input -%!test -%!error ichol0 ([]); -%!error ichol0 ([],[]); -%!error [~,~] = ichol0 ([],[],[]); -%!error [L] = ichol0 ([], 'foo'); -%!error [L] = ichol0 (A_1, [], 'off'); -%!error [L, E] = ichol0 (A_1, 'off'); -%!error ichol0 (sparse (0), 'off'); -%!error ichol0 ([], 'foo'); -%! -%!test -%! L = ichol0 (sparse (1), 'off'); -%! assert (L, sparse (1)); -%! L = ichol0 (sparse (2), 'off'); -%! assert (L, sparse (sqrt (2))); -%! L = ichol0 (sparse ([]), 'off'); -%! assert (L, sparse ([])); -%! -%!test -%! L = ichol0 (A_1, 'off'); -%! assert (norm (A_1 - L*L', 'fro') / norm (A_1, 'fro'), 1e-2, 1e-2); -%! L = ichol0 (A_1, 'on'); -%! assert (norm (A_1 - L*L', 'fro') / norm (A_1, 'fro'), 2e-2, 1e-2); -%! -%!test -%! L = ichol0 (A_2, 'off'); -%! assert (norm (A_2 - L*L', 'fro') / norm (A_2, 'fro'), 1e-1, 1e-1) -%! L = ichol0 (A_2, 'on'); -%! assert (norm (A_2 - L*L', 'fro') / norm (A_2, 'fro'), 2e-1, 1e-1) -%! -%!test -%! L = ichol0 (A_3, 'off'); -%! assert (norm (A_3 - L*L', 'fro') / norm (A_3, 'fro'), eps, eps); -%! L = ichol0 (A_3, 'on'); -%! assert (norm (A_3 - L*L', 'fro') / norm (A_3, 'fro'), eps, eps); -%! -%!test -%! L = ichol0 (A_4, 'off'); -%! assert (norm (A_4 - L*L', 'fro') / norm (A_4, 'fro'), 1e-1, 1e-1); -%! L = ichol0 (A_4, 'on'); -%! assert (norm (A_4 - L*L', 'fro') / norm (A_4, 'fro'), 1e-1, 1e-1); -%! -%% Complex matrices -%!test -%! L = ichol0 (A_5, 'off'); -%! assert (norm (A_5 - L*L', 'fro') / norm (A_5, 'fro'), 1e-2, 1e-2); -%! L = ichol0 (A_5, 'on'); -%! assert (norm (A_5 - L*L', 'fro') / norm (A_5, 'fro'), 2e-2, 1e-2); -%% Negative pivot -%!error ichol0 (A_6, 'off'); -%% Complex entry in the diagonal -%!error ichol0 (A_7, 'off'); - -*/ - - diff -r 168c0aa9bb05 -r df64071e538c libinterp/dldfcn/icholt.cc --- a/libinterp/dldfcn/icholt.cc Tue Aug 12 15:58:18 2014 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,480 +0,0 @@ -/** - * Copyright (C) 2014 Eduardo Ramos Fernández - * - * This file is part of Octave. - * - * Octave is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the Free Software - * Foundation; either version 3 of the License, or (at your option) any later - * version. - * - * Octave is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along with - * Octave; see the file COPYING. If not, see . - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "defun-dld.h" -#include "parse.h" - -// Secondary functions specialiced for complex or real case used -// in icholt algorithms. -template < typename T > inline T -ichol_mult_complex (T a, T b) -{ - b.imag (-std::imag (b)); - return a * b; -} - -template < typename T > inline bool -ichol_checkpivot_complex (T pivot) -{ - if (pivot.imag () != 0) - { - error ("icholt: Non-real pivot encountered. \ - The matrix must be hermitian"); - return false; - } - else if (pivot.real () < 0) - { - error ("icholt: Non-positive pivot encountered."); - return false; - } - return true; - -} - -template < typename T > inline bool -ichol_checkpivot_real (T pivot) -{ - if (pivot < T (0)) - { - error ("icholt: Non-positive pivot encountered."); - return false; - } - return true; -} - -template < typename T> inline T -ichol_mult_real (T a, T b) -{ - return a * b; -} - - -template -void ichol_t (const octave_matrix_t& sm, octave_matrix_t& L, const T* cols_norm, - const T droptol, const std::string michol = "off") - -{ - - const octave_idx_type n = sm.cols (); - octave_idx_type j, jrow, jend, jjrow, jw, i, k, jj, Llist_len, total_len, w_len, - max_len, ind; - - char opt; - enum {OFF, ON}; - if (michol == "on") - opt = ON; - else - opt = OFF; - - // Input matrix pointers - octave_idx_type* cidx = sm.cidx (); - octave_idx_type* ridx = sm.ridx (); - T* data = sm.data (); - - // Output matrix data structures. Because it is not known the - // final zero pattern of the output matrix due to fill-in elements, - // an heuristic approach has been adopted for memory allocation. The - // size of ridx_out_l and data_out_l is incremented 10% of their actual - // size (nnz(A) in the beginning). If that amount is less than n, their - // size is just incremented in n elements. This way the number of - // reallocations decrease throughout the process, obtaining a good performance. - max_len = sm.nnz (); - max_len += (0.1 * max_len) > n ? 0.1 * max_len : n; - Array cidx_out_l (dim_vector (n + 1,1)); - octave_idx_type* cidx_l = cidx_out_l.fortran_vec (); - Array ridx_out_l (dim_vector (max_len ,1)); - octave_idx_type* ridx_l = ridx_out_l.fortran_vec (); - Array data_out_l (dim_vector (max_len, 1)); - T* data_l = data_out_l.fortran_vec (); - - // Working arrays - OCTAVE_LOCAL_BUFFER (T, w_data, n); - OCTAVE_LOCAL_BUFFER (octave_idx_type, Lfirst, n); - OCTAVE_LOCAL_BUFFER (octave_idx_type, Llist, n); - OCTAVE_LOCAL_BUFFER (T, col_drops, n); - std::vector vec; - vec.resize (n); - - - T zero = T (0); - cidx_l[0] = cidx[0]; - for (i = 0; i < n; i++) - { - Llist[i] = -1; - Lfirst[i] = -1; - w_data[i] = 0; - col_drops[i] = zero; - vec[i] = 0; - } - - total_len = 0; - for (k = 0; k < n; k++) - { - ind = 0; - for (j = cidx[k]; j < cidx[k+1]; j++) - { - w_data[ridx[j]] = data[j]; - if (ridx[j] != k) - { - vec[ind] = ridx[j]; - ind++; - } - } - jrow = Llist[k]; - while (jrow != -1) - { - jjrow = Lfirst[jrow]; - jend = cidx_l[jrow+1]; - for (jj = jjrow; jj < jend; jj++) - { - j = ridx_l[jj]; - // If the element in the j position of the row is zero, - // then it will become non-zero, so we add it to the - // vector that keeps track of non-zero elements in the working row. - if (w_data[j] == zero) - { - vec[ind] = j; - ind++; - } - w_data[j] -= ichol_mult (data_l[jj], data_l[jjrow]); - - } - // Update the actual column first element and update the - // linked list of the jrow row. - if ((jjrow + 1) < jend) - { - Lfirst[jrow]++; - j = jrow; - jrow = Llist[jrow]; - Llist[j] = Llist[ridx_l[Lfirst[j]]]; - Llist[ridx_l[Lfirst[j]]] = j; - } - else - jrow = Llist[jrow]; - } - - // Resizing output arrays - if ((max_len - total_len) < n) - { - max_len += (0.1 * max_len) > n ? 0.1 * max_len : n; - data_out_l.resize (dim_vector (max_len, 1)); - data_l = data_out_l.fortran_vec (); - ridx_out_l.resize (dim_vector (max_len, 1)); - ridx_l = ridx_out_l.fortran_vec (); - } - - // The sorting of the non-zero elements of the working column can be - // handled in a couple of ways. The most efficient two I found, are - // keeping the elements in an ordered binary search tree dinamically - // or keep them unsorted in a vector and at the end of the outer - // iteration order them. The last approach exhibit lower execution - // times. - std::sort (vec.begin (), vec.begin () + ind); - - data_l[total_len] = w_data[k]; - ridx_l[total_len] = k; - w_len = 1; - - // Extract then non-zero elements of working column and drop the - // elements that are lower than droptol * cols_norm[k]. - for (i = 0; i < ind ; i++) - { - jrow = vec[i]; - if (w_data[jrow] != zero) - { - if (std::abs (w_data[jrow]) < (droptol * cols_norm[k])) - { - if (opt == ON) - { - col_drops[k] += w_data[jrow]; - col_drops[jrow] += w_data[jrow]; - } - } - else - { - data_l[total_len + w_len] = w_data[jrow]; - ridx_l[total_len + w_len] = jrow; - w_len++; - } - vec[i] = 0; - } - w_data[jrow] = zero; - } - - // Compensate column sums --> michol option - if (opt == ON) - data_l[total_len] += col_drops[k]; - - if (data_l[total_len] == zero) - { - error ("icholt: There is a pivot equal to zero."); - break; - } - else if (!ichol_checkpivot (data_l[total_len])) - break; - - // Once the elements are dropped and compensation of columns - // sums are done, scale the elements by the pivot. - data_l[total_len] = std::sqrt (data_l[total_len]); - for (jj = total_len + 1; jj < (total_len + w_len); jj++) - data_l[jj] /= data_l[total_len]; - total_len += w_len; - cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len; - - // Update Llist and Lfirst with the k-column information. - if (k < (n - 1)) - { - Lfirst[k] = cidx_l[k]; - if ((Lfirst[k] + 1) < cidx_l[k+1]) - { - Lfirst[k]++; - jjrow = ridx_l[Lfirst[k]]; - Llist[k] = Llist[jjrow]; - Llist[jjrow] = k; - } - } - - } - - if (! error_state) - { - // Build the output matrices - L = octave_matrix_t (n, n, total_len); - for (i = 0; i <= n; i++) - L.cidx (i) = cidx_l[i]; - for (i = 0; i < total_len; i++) - { - L.ridx (i) = ridx_l[i]; - L.data (i) = data_l[i]; - } - } - -} - -DEFUN_DLD (icholt, args, nargout, "-*- texinfo -*-\n\ -@deftypefn {Loadable Function} {@var{L} =} icholt (@var{A}, @var{droptol}, @var{michol})\n\ -\n\ -Computes the thresholded Incomplete Cholesky factorization [ICT] of A \ -which must be an square hermitian matrix in the complex case and a symmetric \ -positive definite matrix in the real one. \ -\n\ -@code{[@var{L}] = icholt (@var{A}, @var{droptol}, @var{michol})} \ -computes the ICT of @var{A}, such that @code{@var{L} * @var{L}'} is an \ -approximation of the square sparse hermitian matrix @var{A}. @var{droptol} is \ -a non-negative scalar used as a drop tolerance when performing ICT. Elements \ -which are smaller in magnitude than @code{@var{droptol} * norm(@var{A}(j:end, j), 1)} \ -, are dropped from the resulting factor @var{L}. The parameter @var{michol} \ -decides whether the Modified IC(0) should be performed. This compensates the \ -main diagonal of @var{L}, such that @code{@var{A} * @var{e} = @var{L} * @var{L}' \ - * @var{e}} with @code{@var{e} = ones (size (@var{A}, 2), 1))} holds. \n\ -\n\ -For more information about the algorithms themselves see:\n\ -\n\ -[1] Saad, Yousef. \"Preconditioning Techniques.\" Iterative Methods for Sparse Linear \ -Systems. PWS Publishing Company, 1996. \ -\n\ -\n\ -[2] Jones, Mark T. and Plassmann, Paul E.: An Improved Incomplete Cholesky \ -Factorization (1992). \ -\n\ -@seealso{ichol, ichol0, chol, ilu}\n\ -@end deftypefn") -{ - octave_value_list retval; - - int nargin = args.length (); - // Default values of parameters - std::string michol = "off"; - double droptol = 0; - - - if (nargout > 1 || nargin < 1 || nargin > 3) - { - print_usage (); - return retval; - } - - if (args (0).is_scalar_type () || !args (0).is_sparse_type ()) - error ("icholt: 1. parameter must be a sparse square matrix."); - - if (args (0).is_empty ()) - { - retval (0) = octave_value (SparseMatrix ()); - return retval; - } - - if (! error_state && (nargin >= 2)) - { - droptol = args (1).double_value (); - if (error_state || (droptol < 0) || ! args (1).is_real_scalar ()) - error ("icholt: 2. parameter must be a positive real scalar."); - } - - if (! error_state && (nargin == 3)) - { - michol = args (2).string_value (); - if (error_state || !(michol == "on" || michol == "off")) - error ("icholt: 3. parameter must be 'on' or 'off' character string."); - } - - if (!error_state) - { - octave_value_list param_list; - if (! args (0).is_complex_type ()) - { - Array cols_norm; - SparseMatrix L; - param_list.append (args (0).sparse_matrix_value ()); - SparseMatrix sm_l = feval ("tril", - param_list) (0).sparse_matrix_value (); - param_list (0) = sm_l; - param_list (1) = 1; - param_list (2) = "cols"; - cols_norm = feval ("norm", param_list) (0).vector_value (); - param_list.clear (); - ichol_t - (sm_l, L, cols_norm.fortran_vec (), droptol, michol); - if (! error_state) - retval (0) = octave_value (L); - } - else - { - Array cols_norm; - SparseComplexMatrix L; - param_list.append (args (0).sparse_complex_matrix_value ()); - SparseComplexMatrix sm_l = feval ("tril", - param_list) (0).sparse_complex_matrix_value (); - param_list (0) = sm_l; - param_list (1) = 1; - param_list (2) = "cols"; - cols_norm = feval ("norm", param_list) (0).complex_vector_value (); - param_list.clear (); - ichol_t - (sm_l, L, cols_norm.fortran_vec (), Complex (droptol), michol); - if (! error_state) - retval (0) = octave_value (L); - } - - } - - return retval; -} - -/* -%% Real matrices -%!shared A_1, A_2, A_3, A_4, A_5 -%! A_1 = [ 0.37, -0.05, -0.05, -0.07; -%! -0.05, 0.116, 0.0, -0.05; -%! -0.05, 0.0, 0.116, -0.05; -%! -0.07, -0.05, -0.05, 0.202]; -%! A_1 = sparse(A_1); -%! -%! A_2 = gallery ('poisson', 30); -%! -%! A_3 = gallery ('tridiag', 50); -%! -%! nx = 400; ny = 200; -%! hx = 1 / (nx + 1); hy = 1 / (ny + 1); -%! Dxx = spdiags ([ones(nx, 1), -2 * ones(nx, 1), ones(nx, 1)], [-1 0 1 ], nx, nx) / (hx ^ 2); -%! Dyy = spdiags ([ones(ny, 1), -2 * ones(ny, 1), ones(ny, 1)], [-1 0 1 ], ny, ny) / (hy ^ 2); -%! A_4 = -kron (Dxx, speye (ny)) - kron (speye (nx), Dyy); -%! A_4 = sparse (A_4); -%! -%! A_5 = [ 0.37, -0.05, -0.05, -0.07; -%! -0.05, 0.116, 0.0, -0.05 + 0.05i; -%! -0.05, 0.0, 0.116, -0.05; -%! -0.07, -0.05 - 0.05i, -0.05, 0.202]; -%! A_5 = sparse(A_5); -%! A_6 = [ 0.37, -0.05 - i, -0.05, -0.07; -%! -0.05 + i, 0.116, 0.0, -0.05; -%! -0.05, 0.0, 0.116, -0.05; -%! -0.07, -0.05, -0.05, 0.202]; -%! A_6 = sparse(A_6); -%! A_7 = A_5; -%! A_7(1) = 2i; -%! -%!test -%!error icholt ([]); -%!error icholt ([],[]); -%!error icholt ([],[],[]); -%!error [~] = icholt ([],[],[]); -%!error [L] = icholt ([],[],[]); -%!error [L] = icholt ([], 1e-4, 1); -%!error [L] = icholt (A_1, [], 'off'); -%!error [L] = icholt (A_1, 1e-4, []); -%!error [L, E] = icholt (A_1, 1e-4, 'off'); -%!error [L] = icholt (A_1, 1e-4, 'off', A_1); -%!error icholt (sparse (0), 1e-4, 'off'); -%!error icholt (sparse (-0), 1e-4, 'off'); -%!error icholt (sparse (-1), 1e-4, 'off'); -%!error icholt (sparse (i), 1e-4, 'off'); -%!error icholt (sparse (-i), 1e-4, 'off'); -%!error icholt (sparse (1 + 1i), 1e-4, 'off'); -%!error icholt (sparse (1 - 1i), 1e-4, 'off'); -%! -%!test -%! L = icholt (sparse (1), 1e-4, 'off'); -%! assert (L, sparse (1)); -%! L = icholt (sparse (4), 1e-4, 'off'); -%! assert (L, sparse (2)); -%! -%!test -%! L = icholt (A_1, 1e-4, 'off'); -%! assert (norm (A_1 - L*L', 'fro') / norm (A_1, 'fro'), eps, eps); -%! L = icholt (A_1, 1e-4, 'on'); -%! assert (norm (A_1 - L*L', 'fro') / norm (A_1, 'fro'), eps, eps); -%! -%!test -%! L = icholt (A_2, 1e-4, 'off'); -%! assert (norm (A_2 - L*L', 'fro') / norm (A_2, 'fro'), 1e-4, 1e-4); -%! L = icholt (A_2, 1e-4, 'on'); -%! assert (norm (A_2 - L*L', 'fro') / norm (A_2, 'fro'), 3e-4, 1e-4); -%! -%!test -%! L = icholt (A_3, 1e-4, 'off'); -%! assert (norm (A_3 - L*L', 'fro') / norm (A_3, 'fro'), eps, eps); -%! L = icholt (A_3, 1e-4, 'on'); -%! assert (norm (A_3 - L*L', 'fro') / norm (A_3, 'fro'), eps, eps); -%! -%!test -%! L = icholt (A_4, 1e-4, 'off'); -%! assert (norm (A_4 - L*L', 'fro') / norm (A_4, 'fro'), 2e-4, 1e-4); -%! L = icholt (A_4, 1e-4, 'on'); -%! assert (norm (A_4 - L*L', 'fro') / norm (A_4, 'fro'), 7e-4, 1e-4); -%! -%% Complex matrices -%!test -%! L = ichol0 (A_5, 'off'); -%! assert (norm (A_5 - L*L', 'fro') / norm (A_5, 'fro'), 1e-2, 1e-2); -%! L = ichol0 (A_5, 'on'); -%! assert (norm (A_5 - L*L', 'fro') / norm (A_5, 'fro'), 2e-2, 1e-2); -%% Negative pivot -%!error ichol0 (A_6, 'off'); -%% Complex entry in the diagonal -%!error ichol0 (A_7, 'off'); -*/ - - diff -r 168c0aa9bb05 -r df64071e538c libinterp/dldfcn/ilu0.cc --- a/libinterp/dldfcn/ilu0.cc Tue Aug 12 15:58:18 2014 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,311 +0,0 @@ -/** - * Copyright (C) 2014 Eduardo Ramos Fernández - * - * This file is part of Octave. - * - * Octave is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the Free Software - * Foundation; either version 3 of the License, or (at your option) any later - * version. - * - * Octave is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along with - * Octave; see the file COPYING. If not, see . - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "defun-dld.h" -#include "parse.h" - -/* - * That function implements the IKJ and JKI variants of gaussian elimination to - * perform the ILUTP decomposition. The behaviour is controlled by milu - * parameter. If milu = ['off'|'col'] the JKI version is performed taking - * advantage of CCS format of the input matrix. If milu = 'row' the input matrix - * has to be transposed to obtain the equivalent CRS structure so we can work - * efficiently with rows. In this case IKJ version is used. - */ - -template -void ilu_0 (octave_matrix_t& sm, const std::string milu = "off") { - - const octave_idx_type n = sm.cols (); - OCTAVE_LOCAL_BUFFER (octave_idx_type, iw, n); - OCTAVE_LOCAL_BUFFER (octave_idx_type, uptr, n); - octave_idx_type j1, j2, jrow, jw, i, k, jj; - T tl, r; - - char opt; - enum {OFF, ROW, COL}; - if (milu == "row") - { - opt = ROW; - sm = sm.transpose (); - } - else if (milu == "col") - opt = COL; - else - opt = OFF; - - octave_idx_type* cidx = sm.cidx (); - octave_idx_type* ridx = sm.ridx (); - T* data = sm.data (); - for (i = 0; i < n; i++) - iw[i] = -1; - for (k = 0; k < n; k++) - { - j1 = cidx[k]; - j2 = cidx[k+1] - 1; - octave_idx_type j; - for (j = j1; j <= j2; j++) - { - iw[ridx[j]] = j; - } - r = 0; - j = j1; - jrow = ridx[j]; - while ((jrow < k) && (j <= j2)) - { - if (opt == ROW) - { - tl = data[j] / data[uptr[jrow]]; - data[j] = tl; - } - for (jj = uptr[jrow] + 1; jj < cidx[jrow+1]; jj++) - { - jw = iw[ridx[jj]]; - if (jw != -1) - if (opt == ROW) - data[jw] -= tl * data[jj]; - else - data[jw] -= data[j] * data[jj]; - - else - // That is for the milu='row' - if (opt == ROW) - r += tl * data[jj]; - else if (opt == COL) - r += data[j] * data[jj]; - } - j++; - jrow = ridx[j]; - } - uptr[k] = j; - if(opt != OFF) - data[uptr[k]] -= r; - if (opt != ROW) - for (jj = uptr[k] + 1; jj < cidx[k+1]; jj++) - data[jj] /= data[uptr[k]]; - if (k != jrow) - { - error ("ilu0: Your input matrix has a zero in the diagonal."); - break; - } - - if (data[j] == T(0)) - { - error ("ilu0: There is a pivot equal to zero."); - break; - } - for(i = j1; i <= j2; i++) - iw[ridx[i]] = -1; - } - if (opt == ROW) - sm = sm.transpose (); -} - -DEFUN_DLD (ilu0, args, nargout, "-*- texinfo -*-\n\ -@deftypefn {Loadable Function} {[@var{L}, @var{U}] =} ilu0 (@var{A})\n\ -@deftypefnx {Loadable Function} {[@var{L}, @var{U}] =} ilu0 (@var{A}, @var{milu})\n\ -\n\ -NOTE: No pivoting is performed.\n\ -\n\ -Computes the incomplete LU-factorization (ILU) with 0-order level of fill of \ -@var{A}.\n\ -\n\ -@code{[@var{L}, @var{U}] = ilu0 (@var{A})} computes the zero fill-in ILU-\ -factorization ILU(0) of @var{A}, such that @code{@var{L} * @var{U}} is an \ -approximation of the square sparse matrix @var{A}. Parameter @var{milu} = \ -['off'|'row'|'col'] set if no row nor column sums are preserved, row sums \ -are preserved or column sums are preserved respectively.\n\ -\n\ -For a full description of ILU0 and its options see ilu documentation.\n\ -\n\ -For more information about the algorithms themselves see:\n\ -\n\ -[1] Saad, Yousef: Iterative Methods for Sparse Linear Systems. Second Edition. \ -Minneapolis, Minnesota: Siam 2003.\n\ -\n\ - @seealso{ilu, ilutp, iluc, ichol}\n\ - @end deftypefn") -{ - octave_value_list retval; - - int nargin = args.length (); - std::string milu; - - - if (nargout > 2 || nargin < 1 || nargin > 2) - { - print_usage (); - return retval; - } - - if (args (0).is_empty ()) - { - retval (0) = octave_value (SparseMatrix()); - retval (1) = octave_value (SparseMatrix()); - return retval; - } - - if (args (0).is_scalar_type () || !args (0).is_sparse_type ()) - error ("ilu0: 1. parameter must be a sparse square matrix."); - - if (nargin == 2) - { - milu = args (1).string_value (); - if (error_state || !(milu == "row" || milu == "col" || milu == "off")) - error ( - "ilu0: 2. parameter must be 'row', 'col' or 'off' character string."); - // maybe resolve milu to a numerical value / enum type already here! - } - - - if (!error_state) - { - // In ILU0 algorithm the zero-pattern of the input matrix is preserved so - // it's structure does not change during the algorithm. The same input - // matrix is used to build the output matrix due to that fact. - octave_value_list param_list; - if (!args (0).is_complex_type ()) - { - SparseMatrix sm = args (0).sparse_matrix_value (); - ilu_0 (sm, milu); - if (!error_state) - { - param_list.append (sm); - retval (1) = octave_value ( - feval ("triu", param_list)(0).sparse_matrix_value ()); - SparseMatrix eye = feval ("speye", - octave_value_list ( - octave_value (sm.cols ())))(0).sparse_matrix_value (); - param_list.append (-1); - retval (0) = octave_value ( - eye + feval ("tril", param_list)(0).sparse_matrix_value ()); - - } - } - else - { - SparseComplexMatrix sm = args (0).sparse_complex_matrix_value (); - ilu_0 (sm, milu); - if (!error_state) - { - param_list.append (sm); - retval (1) = octave_value ( - feval ("triu", param_list)(0).sparse_complex_matrix_value ()); - SparseComplexMatrix eye = feval ("speye", - octave_value_list ( - octave_value (sm.cols ())))(0).sparse_complex_matrix_value (); - param_list.append (-1); - retval (0) = octave_value (eye + - feval ("tril", param_list)(0).sparse_complex_matrix_value ()); - } - } - - } - - return retval; -} - -/* Test cases for real numbers. -%!shared n_tiny, n_small, n_medium, n_large, A_tiny, A_small, A_medium, A_large -%! n_tiny = 5; -%! n_small = 40; -%! n_medium = 600; -%! n_large = 10000; -%! A_tiny = spconvert ([1 4 2 3 3 4 2 5; 1 1 2 3 4 4 5 5; 1 2 3 4 5 6 7 8]'); -%! A_small = sprand (n_small, n_small, 1/n_small) + speye (n_small); -%! A_medium = sprand (n_medium, n_medium, 1/n_medium) + speye (n_medium); -%! A_large = sprand (n_large, n_large, 1/n_large/10) + speye (n_large); -%!# Input validation tests -%!test -%!error [L,U] = ilu0(A_tiny, 1); -%!error [L,U] = ilu0(A_tiny, [1, 2]); -%!error [L,U] = ilu0(A_tiny, ''); -%!error [L,U] = ilu0(A_tiny, 'foo'); -%! [L,U] = ilu0 ([]); -%! assert (isempty (L), logical (1)); -%! assert (isempty (U), logical (1)); -%!error [L,U] = ilu0 (0); -%!error [L,U] = ilu0 (sparse (0)); -%! [L,U] = ilu0 (sparse (2)); -%! assert (L, sparse (1)); -%! assert (U, sparse (2)); -%!test -%! [L,U] = ilu0 (A_tiny); -%! assert (norm (A_tiny - L * U, "fro") / norm (A_tiny, "fro"), 0, n_tiny*eps); -%!test -%! [L,U] = ilu0 (A_small); -%! assert (norm (A_small - L * U, "fro") / norm (A_small, "fro"), 0, 1); -%!test -%! [L,U] = ilu0 (A_medium); -%! assert (norm (A_medium - L * U, "fro") / norm (A_medium, "fro"), 0, 1); -%!test -%! [L,U] = ilu0 (A_large); -%! assert (norm (A_large - L * U, "fro") / norm (A_large, "fro"), 0, 1); -*/ - -/* Test cases for complex numbers -%!shared n_tiny, n_small, n_medium, n_large, A_tiny, A_small, A_medium, A_large -%! n_tiny = 5; -%! n_small = 40; -%! n_medium = 600; -%! n_large = 10000; -%! A_tiny = spconvert([1 4 2 3 3 4 2 5; 1 1 2 3 4 4 5 5; 1 2 3 4 5 6 7 8]'); -%! A_tiny(1,1) += 1i; -%! A_small = sprand(n_small, n_small, 1/n_small) + ... -%! i * sprand(n_small, n_small, 1/n_small) + speye (n_small); -%! A_medium = sprand(n_medium, n_medium, 1/n_medium) + ... -%! i * sprand(n_medium, n_medium, 1/n_medium) + speye (n_medium); -%! A_large = sprand(n_large, n_large, 1/n_large/10) + ... -%! i * sprand(n_large, n_large, 1/n_large/10) + speye (n_large); -%!test -%! [L,U] = ilu0 ([]); -%! assert (isempty (L), logical (1)); -%! assert (isempty (U), logical (1)); -%!error [L,U] = ilu0 (0+0i); -%!error [L,U] = ilu0 (0i); -%!error [L,U] = ilu0 (sparse (0+0i)); -%!error [L,U] = ilu0 (sparse (0i)); -%!test -%! [L,U] = ilu0 (sparse (2+0i)); -%! assert (L, sparse (1)); -%! assert (U, sparse (2)); -%! [L,U] = ilu0 (sparse (2+2i)); -%! assert (L, sparse (1)); -%! assert (U, sparse (2+2i)); -%! [L,U] = ilu0 (sparse (2i)); -%! assert (L, sparse (1)); -%! assert (U, sparse (2i)); -%!test -%! [L,U] = ilu0 (A_tiny); -%! assert (norm (A_tiny - L * U, "fro") / norm (A_tiny, "fro"), 0, n_tiny*eps); -%!test -%! [L,U] = ilu0 (A_small); -%! assert (norm (A_small - L * U, "fro") / norm (A_small, "fro"), 0, 1); -%!test -%! [L,U] = ilu0 (A_medium); -%! assert (norm (A_medium - L * U, "fro") / norm (A_medium, "fro"), 0, 1); -%!test -%! [L,U] = ilu0 (A_large); -%! assert (norm (A_large - L * U, "fro") / norm (A_large, "fro"), 0, 1); -*/ - diff -r 168c0aa9bb05 -r df64071e538c libinterp/dldfcn/iluc.cc --- a/libinterp/dldfcn/iluc.cc Tue Aug 12 15:58:18 2014 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,518 +0,0 @@ -/** - * Copyright (C) 2014 Eduardo Ramos Fernández - * - * This file is part of Octave. - * - * Octave is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the Free Software - * Foundation; either version 3 of the License, or (at your option) any later - * version. - * - * Octave is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along with - * Octave; see the file COPYING. If not, see . - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "defun-dld.h" -#include "parse.h" - -template -void ilu_crout (octave_matrix_t& sm_l, octave_matrix_t& sm_u, - octave_matrix_t& L, octave_matrix_t& U, T* cols_norm, - T* rows_norm, const T droptol = 0, - const std::string milu = "off") -{ - - // Map the strings into chars to faster comparation inside loops - #define ROW 1 - #define COL 2 - #define OFF 0 - char opt; - if (milu == "row") - opt = ROW; - else if (milu == "col") - opt = COL; - else - opt = OFF; - - octave_idx_type jrow, i, j, k, jj, total_len_l, total_len_u, max_len_u, - max_len_l, w_len_u, w_len_l, cols_list_len, rows_list_len; - - const octave_idx_type n = sm_u.cols (); - sm_u = sm_u.transpose (); - - max_len_u = sm_u.nnz (); - max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n; - max_len_l = sm_l.nnz (); - max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n; - // Extract pointers to the arrays for faster access inside loops - octave_idx_type* cidx_in_u = sm_u.cidx (); - octave_idx_type* ridx_in_u = sm_u.ridx (); - T* data_in_u = sm_u.data (); - octave_idx_type* cidx_in_l = sm_l.cidx (); - octave_idx_type* ridx_in_l = sm_l.ridx (); - T* data_in_l = sm_l.data (); - T tl, pivot; - - // L output arrays - Array ridx_out_l (dim_vector (max_len_l, 1)); - octave_idx_type* ridx_l = ridx_out_l.fortran_vec (); - Array data_out_l (dim_vector (max_len_l, 1)); - T* data_l = data_out_l.fortran_vec (); - - // U output arrays - Array ridx_out_u (dim_vector (max_len_u, 1)); - octave_idx_type* ridx_u = ridx_out_u.fortran_vec (); - Array data_out_u (dim_vector (max_len_u, 1)); - T* data_u = data_out_u.fortran_vec (); - - // Working arrays - OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_l, n + 1); - OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_u, n + 1); - OCTAVE_LOCAL_BUFFER (octave_idx_type, cols_list, n); - OCTAVE_LOCAL_BUFFER (octave_idx_type, rows_list, n); - OCTAVE_LOCAL_BUFFER (T, w_data_l, n); - OCTAVE_LOCAL_BUFFER (T, w_data_u, n); - OCTAVE_LOCAL_BUFFER (octave_idx_type, Ufirst, n); - OCTAVE_LOCAL_BUFFER (octave_idx_type, Lfirst, n); - OCTAVE_LOCAL_BUFFER (T, cr_sum, n); - - T zero = T (0); - - cidx_u[0] = cidx_in_u[0]; - cidx_l[0] = cidx_in_l[0]; - for (i = 0; i < n; i++) - { - w_data_u[i] = zero; - w_data_l[i] = zero; - cr_sum[i] = zero; - } - - total_len_u = 0; - total_len_l = 0; - cols_list_len = 0; - rows_list_len = 0; - - for (k = 0; k < n; k++) - { - // Load the working column and working row - for (i = cidx_in_l[k]; i < cidx_in_l[k+1]; i++) - w_data_l[ridx_in_l[i]] = data_in_l[i]; - - for (i = cidx_in_u[k]; i < cidx_in_u[k+1]; i++) - w_data_u[ridx_in_u[i]] = data_in_u[i]; - - // Update U working row - for (j = 0; j < rows_list_len; j++) - { - if ((Ufirst[rows_list[j]] != -1)) - for (jj = Ufirst[rows_list[j]]; jj < cidx_u[rows_list[j]+1]; jj++) - { - jrow = ridx_u[jj]; - w_data_u[jrow] -= data_u[jj] * data_l[Lfirst[rows_list[j]]]; - } - } - // Update L working column - for (j = 0; j < cols_list_len; j++) - { - if (Lfirst[cols_list[j]] != -1) - for (jj = Lfirst[cols_list[j]]; jj < cidx_l[cols_list[j]+1]; jj++) - { - jrow = ridx_l[jj]; - w_data_l[jrow] -= data_l[jj] * data_u[Ufirst[cols_list[j]]]; - } - } - - if ((max_len_u - total_len_u) < n) - { - max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n; - data_out_u.resize (dim_vector (max_len_u, 1)); - data_u = data_out_u.fortran_vec (); - ridx_out_u.resize (dim_vector (max_len_u, 1)); - ridx_u = ridx_out_u.fortran_vec (); - } - - if ((max_len_l - total_len_l) < n) - { - max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n; - data_out_l.resize (dim_vector (max_len_l, 1)); - data_l = data_out_l.fortran_vec (); - ridx_out_l.resize (dim_vector (max_len_l, 1)); - ridx_l = ridx_out_l.fortran_vec (); - } - - // Expand the working row into the U output data structures - w_len_l = 0; - data_u[total_len_u] = w_data_u[k]; - ridx_u[total_len_u] = k; - w_len_u = 1; - for (i = k + 1; i < n; i++) - { - if (w_data_u[i] != zero) - { - if (std::abs (w_data_u[i]) < (droptol * rows_norm[k])) - { - if (opt == ROW) - cr_sum[k] += w_data_u[i]; - else if (opt == COL) - cr_sum[i] += w_data_u[i]; - } - else - { - data_u[total_len_u + w_len_u] = w_data_u[i]; - ridx_u[total_len_u + w_len_u] = i; - w_len_u++; - } - } - - // Expand the working column into the L output data structures - if (w_data_l[i] != zero) - { - if (std::abs (w_data_l[i]) < (droptol * cols_norm[k])) - { - if (opt == COL) - cr_sum[k] += w_data_l[i]; - else if (opt == ROW) - cr_sum[i] += w_data_l[i]; - } - else - { - data_l[total_len_l + w_len_l] = w_data_l[i]; - ridx_l[total_len_l + w_len_l] = i; - w_len_l++; - } - } - w_data_u[i] = zero; - w_data_l[i] = zero; - } - - // Compensate row and column sums --> milu option - if (opt == COL || opt == ROW) - data_u[total_len_u] += cr_sum[k]; - - // Check if the pivot is zero - if (data_u[total_len_u] == zero) - { - error ("iluc: There is a pivot equal to zero."); - break; - } - - // Scale the elements in L by the pivot - for (i = total_len_l ; i < (total_len_l + w_len_l); i++) - data_l[i] /= data_u[total_len_u]; - - - total_len_u += w_len_u; - cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u; - total_len_l += w_len_l; - cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l; - - // The tricky part of the algorithm. The arrays pointing to the first - // working element of each column in the next iteration (Lfirst) or - // the first working element of each row (Ufirst) are updated. Also the - // arrays working as lists cols_list and rows_list are filled with indexes - // pointing to Ufirst and Lfirst respectively. - // TODO: Maybe the -1 indicating in Ufirst and Lfirst, that no elements - // have to be considered in a certain column or row in next iteration, can - // be removed. It feels safer to me using such an indicator. - if (k < (n - 1)) - { - if (w_len_u > 0) - Ufirst[k] = cidx_u[k]; - else - Ufirst[k] = -1; - if (w_len_l > 0) - Lfirst[k] = cidx_l[k]; - else - Lfirst[k] = -1; - cols_list_len = 0; - rows_list_len = 0; - for (i = 0; i <= k; i++) - { - if (Ufirst[i] != -1) - { - jj = ridx_u[Ufirst[i]]; - if (jj < (k + 1)) - { - if (Ufirst[i] < (cidx_u[i+1])) - { - Ufirst[i]++; - if (Ufirst[i] == cidx_u[i+1]) - Ufirst[i] = -1; - else - jj = ridx_u[Ufirst[i]]; - } - } - if (jj == (k + 1)) - { - cols_list[cols_list_len] = i; - cols_list_len++; - } - } - - if (Lfirst[i] != -1) - { - jj = ridx_l[Lfirst[i]]; - if (jj < (k + 1)) - if(Lfirst[i] < (cidx_l[i+1])) - { - Lfirst[i]++; - if (Lfirst[i] == cidx_l[i+1]) - Lfirst[i] = -1; - else - jj = ridx_l[Lfirst[i]]; - } - if (jj == (k + 1)) - { - rows_list[rows_list_len] = i; - rows_list_len++; - } - } - } - } - } - - if (!error_state) - { - // Build the output matrices - L = octave_matrix_t (n, n, total_len_l); - U = octave_matrix_t (n, n, total_len_u); - for (i = 0; i <= n; i++) - L.cidx (i) = cidx_l[i]; - for (i = 0; i < total_len_l; i++) - { - L.ridx (i) = ridx_l[i]; - L.data (i) = data_l[i]; - } - for (i = 0; i <= n; i++) - U.cidx (i) = cidx_u[i]; - for (i = 0; i < total_len_u; i++) - { - U.ridx (i) = ridx_u[i]; - U.data (i) = data_u[i]; - } - U = U.transpose (); - } -} - -DEFUN_DLD (iluc, args, nargout, "-*- texinfo -*-\n\ -@deftypefn {Loadable Function} {[@var{L}, @var{U}] =} iluc (@var{A})\n\ -@deftypefnx {Loadable Function} {[@var{L}, @var{U}] =} iluc (@var{A}, @var{droptol}, \ -@var{milu})\n\ -\n\ -Computes the crout version incomplete LU-factorization (ILU) with threshold of @var{A}.\n\ -\n\ -NOTE: No pivoting is performed.\n\ -\n\ -@code{[@var{L}, @var{U}] = iluc (@var{A})} computes the default crout version\n\ -ILU-factorization with threshold ILUT of @var{A}, such that \ -@code{@var{L} * @var{U}} is an approximation of the square sparse matrix \ -@var{A}. This version of ILU algorithms is significantly faster than ILUT or ILU(0). \ -Parameter @code{@var{droptol}>=0} is the scalar double threshold. All elements \ -@code{x<=@var{droptol}} will be dropped in the factorization. Parameter @var{milu} \ -= ['off'|'row'|'col'] set if no row nor column sums are preserved, row sums are \ -preserved or column sums are preserved respectively.\n\ -\n\ -For a full description of ILUC behaviour and its options see ilu documentation.\n\ -\n\ -For more information about the algorithms themselves see:\n\ -\n\ -[1] Saad, Yousef: Iterative Methods for Sparse Linear Systems. Second Edition. \ -Minneapolis, Minnesota: Siam 2003.\n\ -\n\ -@seealso{ilu, ilu0, ilutp, ichol}\n\ -@end deftypefn") -{ - - octave_value_list retval; - int nargin = args.length (); - std::string milu = "off"; - double droptol = 0; - double thresh = 0; - - if (nargout != 2 || nargin < 1 || nargin > 3) - { - print_usage (); - return retval; - } - - // To be matlab compatible - if (args (0).is_empty ()) - { - retval (0) = octave_value (SparseMatrix()); - retval (1) = octave_value (SparseMatrix()); - return retval; - } - - if (args (0).is_scalar_type () || !args (0).is_sparse_type ()) - error ("iluc: 1. parameter must be a sparse square matrix."); - - if (! error_state && (nargin >= 2)) - { - droptol = args (1).double_value (); - if (error_state || (droptol < 0) || ! args (1).is_real_scalar ()) - error ("iluc: 2. parameter must be a positive real scalar."); - } - - if (! error_state && (nargin == 3)) - { - milu = args (2).string_value (); - if (error_state || !(milu == "row" || milu == "col" || milu == "off")) - error ("iluc: 3. parameter must be 'row', 'col' or 'off' character string."); - } - - if (! error_state) - { - octave_value_list param_list; - if (!args (0).is_complex_type ()) - { - Array cols_norm, rows_norm; - param_list.append (args (0).sparse_matrix_value ()); - SparseMatrix sm_u = feval ("triu", param_list)(0).sparse_matrix_value (); - param_list.append (-1); - SparseMatrix sm_l = feval ("tril", param_list)(0).sparse_matrix_value (); - param_list (1) = "rows"; - rows_norm = feval ("norm", param_list)(0).vector_value (); - param_list (1) = "cols"; - cols_norm = feval ("norm", param_list)(0).vector_value (); - param_list.clear (); - SparseMatrix U; - SparseMatrix L; - ilu_crout (sm_l, sm_u, L, U, cols_norm.fortran_vec (), - rows_norm.fortran_vec (), droptol, milu); - if (! error_state) - { - param_list.append (octave_value (L.cols ())); - SparseMatrix eye = feval ("speye", param_list)(0).sparse_matrix_value (); - retval (0) = octave_value (L + eye); - retval (1) = octave_value (U); - } - } - else - { - Array cols_norm, rows_norm; - param_list.append (args (0).sparse_complex_matrix_value ()); - SparseComplexMatrix sm_u = feval("triu", - param_list)(0).sparse_complex_matrix_value (); - param_list.append (-1); - SparseComplexMatrix sm_l = feval("tril", - param_list)(0).sparse_complex_matrix_value (); - param_list (1) = "rows"; - rows_norm = feval ("norm", param_list)(0).complex_vector_value (); - param_list (1) = "cols"; - cols_norm = feval ("norm", param_list)(0).complex_vector_value (); - param_list.clear (); - SparseComplexMatrix U; - SparseComplexMatrix L; - ilu_crout < SparseComplexMatrix, Complex > - (sm_l, sm_u, L, U, cols_norm.fortran_vec () , - rows_norm.fortran_vec (), Complex (droptol), milu); - if (! error_state) - { - param_list.append (octave_value (L.cols ())); - SparseComplexMatrix eye = feval ("speye", - param_list)(0).sparse_complex_matrix_value (); - retval (0) = octave_value (L + eye); - retval (1) = octave_value (U); - } - } - - - } - - return retval; -} - - -/* Test cases for complex numbers -%!shared n_tiny, n_small, n_medium, n_large, A_tiny, A_small, A_medium, A_large -%! n_tiny = 5; -%! n_small = 40; -%! n_medium = 600; -%! n_large = 10000; -%! A_tiny = spconvert([1 4 2 3 3 4 2 5; 1 1 2 3 4 4 5 5; 1 2 3 4 5 6 7 8]'); -%! A_tiny(1,1) += 1i; -%! A_small = sprand(n_small, n_small, 1/n_small) + i * sprand(n_small, n_small, 1/n_small) + speye (n_small); -%! A_medium = sprand(n_medium, n_medium, 1/n_medium) + i * sprand(n_medium, n_medium, 1/n_medium) + speye (n_medium); -%! A_large = sprand(n_large, n_large, 1/n_large/10) + i * sprand(n_large, n_large, 1/n_large/10) + speye (n_large); -%!# Input validation tests -%!test -%!error [L,U] = iluc(A_tiny, -1); -%!error [L,U] = iluc(A_tiny, [1,2]); -%!error [L,U] = iluc(A_tiny, 2i); -%!error [L,U] = iluc(A_tiny, 1, 'foo'); -%!error [L,U] = iluc(A_tiny, 1, ''); -%!error [L,U] = iluc(A_tiny, 1, 1); -%!error [L,U] = iluc(A_tiny, 1, [1,2]); -%! [L,U] = iluc ([]); -%! assert (isempty (L), logical (1)); -%! assert (isempty (U), logical (1)); -%!error [L,U] = iluc (0+0i); -%!error [L,U] = iluc (0i); -%!error [L,U] = iluc (sparse (0+0i)); -%!error [L,U] = iluc (sparse (0i)); -%! [L,U] = iluc (sparse (2+0i)); -%! assert (L, sparse (1)); -%! assert (U, sparse (2)); -%! [L,U] = iluc (sparse (2+2i)); -%! assert (L, sparse (1)); -%! assert (U, sparse (2+2i)); -%! [L,U] = iluc (sparse (2i)); -%! assert (L, sparse (1)); -%! assert (U, sparse (2i)); -%!# Output tests -%!test -%! [L,U] = iluc (A_tiny); -%! assert (norm (A_tiny - L * U, "fro") / norm (A_tiny, "fro"), 0, n_tiny*eps); -%!test -%! [L,U] = iluc (A_small); -%! assert (norm (A_small - L * U, "fro") / norm (A_small, "fro"), 0, 1); -%!test -%! [L,U] = iluc (A_medium); -%! assert (norm (A_medium - L * U, "fro") / norm (A_medium, "fro"), 0, 1); -%!test -%! [L,U] = iluc (A_large); -%! assert (norm (A_large - L * U, "fro") / norm (A_large, "fro"), 0, 1); -*/ - -/* Test cases for real numbers. -%!shared n_tiny, n_small, n_medium, n_large, A_tiny, A_small, A_medium, A_large -%! n_tiny = 5; -%! n_small = 40; -%! n_medium = 600; -%! n_large = 10000; -%! A_tiny = spconvert ([1 4 2 3 3 4 2 5; 1 1 2 3 4 4 5 5; 1 2 3 4 5 6 7 8]'); -%! A_small = sprand (n_small, n_small, 1/n_small) + speye (n_small); -%! A_medium = sprand (n_medium, n_medium, 1/n_medium) + speye (n_medium); -%! A_large = sprand (n_large, n_large, 1/n_large/10) + speye (n_large); -%!test -%! [L,U] = iluc ([]); -%! assert (isempty (L), logical (1)); -%! assert (isempty (U), logical (1)); -%!error [L,U] = iluc (0); -%!error [L,U] = iluc (sparse (0)); -%!test -%! [L,U] = iluc (sparse (2)); -%! assert (L, sparse (1)); -%! assert (U, sparse (2)); -%!test -%! [L,U] = iluc (A_tiny); -%! assert (norm (A_tiny - L * U, "fro") / norm (A_tiny, "fro"), 0, n_tiny*eps); -%!test -%! [L,U] = iluc (A_small); -%! assert (norm (A_small - L * U, "fro") / norm (A_small, "fro"), 0, 1); -%!test -%! [L,U] = iluc (A_medium); -%! assert (norm (A_medium - L * U, "fro") / norm (A_medium, "fro"), 0, 1); -%!test -%! [L,U] = iluc (A_large); -%! assert (norm (A_large - L * U, "fro") / norm (A_large, "fro"), 0, 1); -*/ diff -r 168c0aa9bb05 -r df64071e538c libinterp/dldfcn/ilutp.cc --- a/libinterp/dldfcn/ilutp.cc Tue Aug 12 15:58:18 2014 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,707 +0,0 @@ -/** - * Copyright (C) 2014 Eduardo Ramos Fernández - * - * This file is part of Octave. - * - * Octave is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the Free Software - * Foundation; either version 3 of the License, or (at your option) any later - * version. - * - * Octave is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along with - * Octave; see the file COPYING. If not, see . - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "defun-dld.h" -#include "parse.h" - - -// That function implements the IKJ and JKI variants of gaussian elimination -// to perform the ILUTP decomposition. The behaviour is controlled by milu -// parameter. If milu = ['off'|'col'] the JKI version is performed taking -// advantage of CCS format of the input matrix. Row pivoting is performed. -// If milu = 'row' the input matrix has to be transposed to obtain the -// equivalent CRS structure so we can work efficiently with rows. In that -// case IKJ version is used and column pivoting is performed. - -template -void ilu_tp (octave_matrix_t& sm, octave_matrix_t& L, octave_matrix_t& U, - octave_idx_type nnz_u, octave_idx_type nnz_l, T* cols_norm, - Array & perm_vec, const T droptol = T(0), - const T thresh = T(0), const std::string milu = "off", - const double udiag = 0) - { - - // Map the strings into chars to faster comparation inside loops - enum {OFF, ROW, COL}; - char opt; - if (milu == "row") - opt = ROW; - else if (milu == "col") - opt = COL; - else - opt = OFF; - - const octave_idx_type n = sm.cols (); - - // That is necessary for the JKI (milu = "row") variant. - if (opt == ROW) - sm = sm.transpose(); - - // Extract pointers to the arrays for faster access inside loops - octave_idx_type* cidx_in = sm.cidx (); - octave_idx_type* ridx_in = sm.ridx (); - T* data_in = sm.data (); - octave_idx_type jrow, i, j, k, jj, c, total_len_l, total_len_u, p_perm, res, - max_ind, max_len_l, max_len_u; - T tl, aux, maximum; - - max_len_u = nnz_u; - max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n; - max_len_l = nnz_l; - max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n; - - Array cidx_out_l (dim_vector (n + 1, 1)); - octave_idx_type* cidx_l = cidx_out_l.fortran_vec (); - Array ridx_out_l (dim_vector (max_len_l, 1)); - octave_idx_type* ridx_l = ridx_out_l.fortran_vec (); - Array data_out_l (dim_vector (max_len_l ,1)); - T* data_l = data_out_l.fortran_vec (); - // Data for U - Array cidx_out_u (dim_vector (n + 1, 1)); - octave_idx_type* cidx_u = cidx_out_u.fortran_vec (); - Array ridx_out_u (dim_vector (max_len_u, 1)); - octave_idx_type* ridx_u = ridx_out_u.fortran_vec (); - Array data_out_u (dim_vector (max_len_u, 1)); - T* data_u = data_out_u.fortran_vec(); - - // Working arrays and permutation arrays - octave_idx_type w_len_u, w_len_l; - T total_sum, partial_col_sum, partial_row_sum; - std::set iw_l; - std::set iw_u; - std::set ::iterator it, it2; - OCTAVE_LOCAL_BUFFER (T, w_data, n); - OCTAVE_LOCAL_BUFFER (octave_idx_type, iperm, n); - octave_idx_type* perm = perm_vec.fortran_vec (); - OCTAVE_LOCAL_BUFFER (octave_idx_type, uptr, n); - - - T zero = T(0); - cidx_l[0] = cidx_in[0]; - cidx_u[0] = cidx_in[0]; - /** - for (i = 0; i < ; i++) - { - ridx_u[i] = 0; - data_u[i] = 0; - ridx_l[i] = 0; - data_l[i] = 0; - } -**/ - for (i = 0; i < n; i++) - { - w_data[i] = 0; - perm[i] = i; - iperm[i] = i; - } - total_len_u = 0; - total_len_l = 0; - - for (k = 0; k < n; k++) - { - - for (j = cidx_in[k]; j < cidx_in[k+1]; j++) - { - p_perm = iperm[ridx_in[j]]; - w_data[iperm[ridx_in[j]]] = data_in[j]; - if (p_perm > k) - iw_l.insert (ridx_in[j]); - else - iw_u.insert (p_perm); - } - - it = iw_u.begin (); - jrow = *it; - total_sum = zero; - while ((jrow < k) && (it != iw_u.end ())) - { - if (opt == COL) - partial_col_sum = w_data[jrow]; - if (w_data[jrow] != zero) - { - if (opt == ROW) - { - partial_row_sum = w_data[jrow]; - tl = w_data[jrow] / data_u[uptr[jrow]]; - } - for (jj = cidx_l[jrow]; jj < cidx_l[jrow+1]; jj++) - { - p_perm = iperm[ridx_l[jj]]; - aux = w_data[p_perm]; - if (opt == ROW) - { - w_data[p_perm] -= tl * data_l[jj]; - partial_row_sum += tl * data_l[jj]; - } - else - { - tl = data_l[jj] * w_data[jrow]; - w_data[p_perm] -= tl; - if (opt == COL) - partial_col_sum += tl; - } - - if ((aux == zero) && (w_data[p_perm] != zero)) - { - if (p_perm > k) - iw_l.insert (ridx_l[jj]); - else - iw_u.insert (p_perm); - } - } - - // Drop element from the U part in IKJ and L part in JKI - // variant (milu = [col|off]) - if ((std::abs (w_data[jrow]) < (droptol * cols_norm[k])) - && (w_data[jrow] != zero)) - { - if (opt == COL) - total_sum += partial_col_sum; - else if (opt == ROW) - total_sum += partial_row_sum; - w_data[jrow] = zero; - it2 = it; - it++; - iw_u.erase (it2); - jrow = *it; - continue; - } - else - // This is the element scaled by the pivot in the actual iteration - if (opt == ROW) - w_data[jrow] = tl; - } - jrow = *(++it); - } - - // Search for the pivot and update iw_l and iw_u if the pivot is not the - // diagonal element - if ((thresh > zero) && (k < (n-1))) - { - maximum = std::abs (w_data[k]) / thresh; - max_ind = perm[k]; - for (it = iw_l.begin (); it != iw_l.end (); ++it) - { - p_perm = iperm[*it]; - if (std::abs (w_data[p_perm]) > maximum) - { - maximum = std::abs (w_data[p_perm]); - max_ind = *it; - it2 = it; - } - } - // If the pivot is not the diagonal element update all. - p_perm = iperm[max_ind]; - if (max_ind != perm[k]) - { - iw_l.erase (it2); - if (w_data[k] != zero) - iw_l.insert (perm[k]); - else - iw_u.insert (k); - // Swap data and update permutation vectors - aux = w_data[k]; - iperm[perm[p_perm]] = k; - iperm[perm[k]] = p_perm; - c = perm[k]; - perm[k] = perm[p_perm]; - perm[p_perm] = c; - w_data[k] = w_data[p_perm]; - w_data[p_perm] = aux; - } - - } - - // Drop elements in the L part in the IKJ and from the U part in the JKI - // version. - it = iw_l.begin (); - while (it != iw_l.end ()) - { - p_perm = iperm[*it]; - if (droptol > zero) - if (std::abs (w_data[p_perm]) < (droptol * cols_norm[k])) - { - if (opt != OFF) - total_sum += w_data[p_perm]; - w_data[p_perm] = zero; - it2 = it; - it++; - iw_l.erase (it2); - continue; - } - - it++; - } - - // If milu =[row|col] sumation is preserved --> Compensate diagonal element. - if (opt != OFF) - { - if ((total_sum > zero) && (w_data[k] == zero)) - iw_u.insert (k); - w_data[k] += total_sum; - } - - - - // Check if the pivot is zero and if udiag is active. - // NOTE: If the pivot == 0 and udiag is active, then if milu = [col|row] - // will not preserve the row sum for that column/row. - if (w_data[k] == zero) - { - if (udiag == 1) - { - w_data[k] = droptol; - iw_u.insert (k); - } - else - { - error ("ilutp: There is a pivot equal to zero."); - break; - } - } - - // Scale the elements on the L part for IKJ version (milu = [col|off]) - if (opt != ROW) - for (it = iw_l.begin (); it != iw_l.end (); ++it) - { - p_perm = iperm[*it]; - w_data[p_perm] = w_data[p_perm] / w_data[k]; - } - - - if ((max_len_u - total_len_u) < n) - { - max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n; - data_out_u.resize (dim_vector (max_len_u, 1)); - data_u = data_out_u.fortran_vec (); - ridx_out_u.resize (dim_vector (max_len_u, 1)); - ridx_u = ridx_out_u.fortran_vec (); - } - - if ((max_len_l - total_len_l) < n) - { - max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n; - data_out_l.resize (dim_vector (max_len_l, 1)); - data_l = data_out_l.fortran_vec (); - ridx_out_l.resize (dim_vector (max_len_l, 1)); - ridx_l = ridx_out_l.fortran_vec (); - } - - // Expand working vector into U. - w_len_u = 0; - for (it = iw_u.begin (); it != iw_u.end (); ++it) - { - if (w_data[*it] != zero) - { - data_u[total_len_u + w_len_u] = w_data[*it]; - ridx_u[total_len_u + w_len_u] = *it; - w_len_u++; - } - w_data[*it] = 0; - } - total_len_u += w_len_u; - if (opt == ROW) - uptr[k] = total_len_u -1; - cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u; - - // Expand working vector into L. - w_len_l = 0; - for (it = iw_l.begin (); it != iw_l.end (); ++it) - { - p_perm = iperm[*it]; - if (w_data[p_perm] != zero) - { - data_l[total_len_l + w_len_l] = w_data[p_perm]; - ridx_l[total_len_l + w_len_l] = *it; - w_len_l++; - } - w_data[*it] = 0; - } - total_len_l += w_len_l; - cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l; - - iw_l.clear (); - iw_u.clear (); - } - - if (!error_state) - { - octave_matrix_t *L_ptr; - octave_matrix_t *U_ptr; - octave_matrix_t diag (n, n, n); - - // L and U are interchanged if milu = 'row'. It is a matter - // of nomenclature to re-use code with both IKJ and JKI - // versions of the algorithm. - if (opt == ROW) - { - L_ptr = &U; - U_ptr = &L; - L = octave_matrix_t (n, n, total_len_u - n); - U = octave_matrix_t (n, n, total_len_l); - } - else - { - L_ptr = &L; - U_ptr = &U; - L = octave_matrix_t (n, n, total_len_l); - U = octave_matrix_t (n, n, total_len_u); - } - - for (i = 0; i <= n; i++) - { - L_ptr->cidx (i) = cidx_l[i]; - U_ptr->cidx (i) = cidx_u[i]; - if (opt == ROW) - U_ptr->cidx (i) -= i; - } - - for (octave_idx_type i = 0; i < n; i++) - { - if (opt == ROW) - diag.elem (i,i) = data_u[uptr[i]]; - j = cidx_l[i]; - - while (j < cidx_l[i+1]) - { - L_ptr->ridx (j) = ridx_l[j]; - L_ptr->data (j) = data_l[j]; - j++; - } - j = cidx_u[i]; - - while (j < cidx_u[i+1]) - { - c = j; - if (opt == ROW) - { - // The diagonal is removed from from L if milu = 'row' - // That is because is convenient to have it inside - // the L part to carry out the process. - if (ridx_u[j] == i) - { - j++; - continue; - } - else - c -= i; - } - U_ptr->data (c) = data_u[j]; - U_ptr->ridx (c) = ridx_u[j]; - j++; - } - } - - if (opt == ROW) - { - U = U.transpose (); - // The diagonal, conveniently permuted is added to U - U += diag.index (idx_vector::colon, perm_vec); - L = L.transpose (); - } - } -} - -DEFUN_DLD (ilutp, args, nargout, "-*- texinfo -*-\n\ -@deftypefn {Loadable Function} {[@var{L}, @var{U}] =} ilutp (@var{A})\n\ -@deftypefnx {Loadable Function} {[@var{L}, @var{U}] =} ilutp (@var{A}, \ -@var{droptol}, @var{thresh}, @var{milu}, @var{udiag})\n\ -@deftypefnx {Loadable Function} {[@var{L}, @var{U}, @var{P}] =} ilutp (@var{A})\n\ -@deftypefnx {Loadable Function} {[@var{L}, @var{U}, @var{P}] =} ilutp \ -(@var{A}, @var{droptol}, @var{thresh}, @var{milu}, @var{udiag})\n\ -\n\ -Computes the incomplete LU-factorization (ILU) with threshold and pivoting.\n\ -@code{[@var{L}, @var{U}] = ilutp (@var{A})} computes the default version of\n\ -ILU-factorization with threshold ILUT of @var{A}, such that \ -@code{@var{L} * @var{U}} is an approximation of the square sparse matrix \ -@var{A}. Pivoting is performed. Parameter @var{droptol} controls the fill-in of \ -output matrices. Default @var{droptol} = 0. Parameter @var{milu} = ['off'|'row'|'col'] \ -set if no row nor column sums are preserved, row sums are preserved or column sums are \ -preserved respectively. There are also additional diferences in the output matrices \ -depending on @var{milu} parameter. Default milu = 'off'. @var{thresh} controls the \ -selection of the pivot. Default @var{thresh} = 0. Parameter @var{udiag} indicates if \ -there will be replacement of 0s in the upper triangular factor with the value of \ -@var{droptol}. Default @var{udiag} = 0.\n\ -\n\ -For a full description of ILUTP behaviour and its options see ilu documentation.\n\ -\n\ -For more information about the algorithms themselves see:\n\n\ -[1] Saad, Yousef: Iterative Methods for Sparse Linear Systems. Second Edition. \ -Minneapolis, Minnesota: Siam 2003.\n\ -\n\ -@seealso{ilu, ilu0, iluc, ichol}\n\ -@end deftypefn") -{ - octave_value_list retval; - - int nargin = args.length (); - std::string milu = ""; - double droptol, thresh; - double udiag; - - - if (nargout < 2 || nargout > 3 || nargin < 1 || nargin > 5) - { - print_usage (); - return retval; - } - - // To be matlab compatible - if (args (0).is_empty ()) - { - retval (0) = octave_value (SparseMatrix ()); - retval (1) = octave_value (SparseMatrix ()); - if (nargout == 3) - retval (2) = octave_value (SparseMatrix ()); - return retval; - } - - if (args (0).is_scalar_type () || !args (0).is_sparse_type () ) - error ("ilutp: 1. parameter must be a sparse square matrix."); - - if (! error_state && (nargin >= 2)) - { - droptol = args (1).double_value (); - if (error_state || (droptol < 0) || ! args (1).is_real_scalar ()) - error ("ilutp: 2. parameter must be a positive scalar."); - } - - if (! error_state && (nargin >= 3)) - { - thresh = args (2).double_value (); - if (error_state || !args (2).is_real_scalar () || (thresh < 0) || thresh > 1) - error ("ilutp: 3. parameter must be a scalar 0 <= thresh <= 1."); - } - - if (! error_state && (nargin >= 4)) - { - milu = args (3).string_value (); - if (error_state || !(milu == "row" || milu == "col" || milu == "off")) - error ("ilutp: 3. parameter must be 'row', 'col' or 'off' character string."); - } - - if (! error_state && (nargin == 5)) - { - udiag = args (4).double_value (); - if (error_state || ! args (4).is_real_scalar () || ((udiag != 0) - && (udiag != 1))) - error ("ilutp: 5. parameter must be a scalar with value 1 or 0."); - } - - if (! error_state) - { - octave_value_list param_list; - octave_idx_type nnz_u, nnz_l; - if (!args (0).is_complex_type ()) - { - Array rc_norm; - SparseMatrix sm = args (0).sparse_matrix_value (); - param_list.append (sm); - nnz_u = (feval ("triu", param_list)(0).sparse_matrix_value ()).nnz (); - param_list.append (-1); - nnz_l = (feval ("tril", param_list)(0).sparse_matrix_value ()).nnz (); - if (milu == "row") - param_list (1) = "rows"; - else - param_list (1) = "cols"; - rc_norm = feval ("norm", param_list)(0).vector_value (); - param_list.clear (); - Array perm (dim_vector (sm.cols (), 1)); - SparseMatrix U; - SparseMatrix L; - ilu_tp (sm, L, U, nnz_u, nnz_l, rc_norm.fortran_vec (), - perm, droptol, thresh, milu, udiag); - if (! error_state) - { - param_list.append (octave_value (L.cols ())); - SparseMatrix eye = feval ("speye", param_list)(0).sparse_matrix_value (); - if (milu == "row") - { - retval (0) = octave_value (L + eye); - if (nargout == 2) - retval (1) = octave_value (U); - else if (nargout == 3) - { - retval (1) = octave_value (U.index (idx_vector::colon, perm)); - retval (2) = octave_value (eye.index (idx_vector::colon, perm)); - } - } - else - { - retval (1) = octave_value (U); - if (nargout == 2) - retval (0) = octave_value (L + eye.index (perm, idx_vector::colon)); - else if (nargout == 3) - { - retval (0) = octave_value (L.index (perm, idx_vector::colon) + eye); - retval (2) = octave_value (eye.index (perm, idx_vector::colon)); - } - } - } - } - else - { - Array rc_norm; - SparseComplexMatrix sm = args (0).sparse_complex_matrix_value (); - param_list.append (sm); - nnz_u = feval ("triu", param_list)(0).sparse_complex_matrix_value ().nnz (); - param_list.append (-1); - nnz_l = feval ("tril", param_list)(0).sparse_complex_matrix_value ().nnz (); - if (milu == "row") - param_list (1) = "rows"; - else - param_list (1) = "cols"; - rc_norm = feval ("norm", param_list)(0).complex_vector_value (); - Array perm (dim_vector (sm.cols (), 1)); - param_list.clear (); - SparseComplexMatrix U; - SparseComplexMatrix L; - ilu_tp < SparseComplexMatrix, Complex> - (sm, L, U, nnz_u, nnz_l, rc_norm.fortran_vec (), perm, - Complex (droptol), Complex (thresh), milu, udiag); - - if (! error_state) - { - param_list.append (octave_value (L.cols ())); - SparseComplexMatrix eye = feval ("speye", - param_list)(0).sparse_complex_matrix_value (); - if (milu == "row") - { - retval (0) = octave_value (L + eye); - if (nargout == 2) - retval (1) = octave_value (U); - else if (nargout == 3) - { - retval (1) = octave_value (U.index (idx_vector::colon, perm)); - retval (2) = octave_value (eye.index (idx_vector::colon, perm)); - } - } - else - { - retval (1) = octave_value (U); - if (nargout == 2) - retval (0) = octave_value (L + eye.index (perm, idx_vector::colon)) ; - else if (nargout == 3) - { - retval (0) = octave_value (L.index (perm, idx_vector::colon) + eye); - retval (2) = octave_value (eye.index (perm, idx_vector::colon)); - } - } - } - } - - } - - return retval; -} - -/* Test cases -%!shared n_tiny, n_small, n_medium, n_large, A_tiny, A_small, A_medium, A_large -%! n_tiny = 5; -%! n_small = 40; -%! n_medium = 600; -%! n_large = 10000; -%! A_tiny = spconvert([1 4 2 3 3 4 2 5; 1 1 2 3 4 4 5 5; 1 2 3 4 5 6 7 8]'); -%! A_tiny(1,1) += 1i; -%! A_small = sprand(n_small, n_small, 1/n_small) + i * sprand(n_small, n_small, 1/n_small) + speye (n_small); -%! A_medium = sprand(n_medium, n_medium, 1/n_medium) + i * sprand(n_medium, n_medium, 1/n_medium) + speye (n_medium); -%! A_large = sprand(n_large, n_large, 1/n_large/10) + i * sprand(n_large, n_large, 1/n_large/10) + speye (n_large); -%!# Input validation tests -%!test -%!error [L,U] = ilutp(A_tiny, -1); -%!error [L,U] = ilutp(A_tiny, [1,2]); -%!error [L,U] = ilutp(A_tiny, 2i); -%!error [L,U] = ilutp(A_tiny, 1, -1); -%!error [L,U] = ilutp(A_tiny, 1, 2); -%!error [L,U] = ilutp(A_tiny, 1, [1, 0]); -%!error [L,U] = ilutp(A_tiny, 1, 1, 'foo'); -%!error [L,U] = ilutp(A_tiny, 1, 1, ''); -%!error [L,U] = ilutp(A_tiny, 1, 1, 1); -%!error [L,U] = ilutp(A_tiny, 1, 1, [1,2]); -%!error [L,U] = ilutp(A_tiny, 1, 1, 'off', 0.5); -%!error [L,U] = ilutp(A_tiny, 1, 1, 'off', -1); -%!error [L,U] = ilutp(A_tiny, 1, 1, 'off', 2); -%!error [L,U] = ilutp(A_tiny, 1, 1, 'off', [1 ,0]); -%! [L,U] = iluc ([]); -%! assert (isempty (L), logical (1)); -%! assert (isempty (U), logical (1)); -%!error [L,U] = iluc (0+0i); -%!error [L,U] = iluc (0i); -%!error [L,U] = iluc (sparse (0+0i)); -%!error [L,U] = iluc (sparse (0i)); -%! [L,U] = iluc (sparse (2+0i)); -%! assert (L, sparse (1)); -%! assert (U, sparse (2)); -%! [L,U] = iluc (sparse (2+2i)); -%! assert (L, sparse (1)); -%! assert (U, sparse (2+2i)); -%! [L,U] = iluc (sparse (2i)); -%! assert (L, sparse (1)); -%! assert (U, sparse (2i)); -%!test -%! [L,U] = iluc (A_tiny); -%! assert (norm (A_tiny - L * U, "fro") / norm (A_tiny, "fro"), 0, n_tiny*eps); -%!test -%! [L,U] = iluc (A_small); -%! assert (norm (A_small - L * U, "fro") / norm (A_small, "fro"), 0, 1); -%!test -%! [L,U] = iluc (A_medium); -%! assert (norm (A_medium - L * U, "fro") / norm (A_medium, "fro"), 0, 1); -%!test -%! [L,U] = iluc (A_large); -%! assert (norm (A_large - L * U, "fro") / norm (A_large, "fro"), 0, 1); -*/ - -/* Test cases for real numbers. -%!shared n_tiny, n_small, n_medium, n_large, A_tiny, A_small, A_medium, A_large -%! n_tiny = 5; -%! n_small = 40; -%! n_medium = 600; -%! n_large = 10000; -%! A_tiny = spconvert ([1 4 2 3 3 4 2 5; 1 1 2 3 4 4 5 5; 1 2 3 4 5 6 7 8]'); -%! A_small = sprand (n_small, n_small, 1/n_small) + speye (n_small); -%! A_medium = sprand (n_medium, n_medium, 1/n_medium) + speye (n_medium); -%! A_large = sprand (n_large, n_large, 1/n_large/10) + speye (n_large); -%!test -%! [L,U] = iluc ([]); -%! assert (isempty (L), logical (1)); -%! assert (isempty (U), logical (1)); -%!error [L,U] = iluc (0); -%!error [L,U] = iluc (sparse (0)); -%!test -%! [L,U] = iluc (sparse (2)); -%! assert (L, sparse (1)); -%! assert (U, sparse (2)); -%!test -%! [L,U] = iluc (A_tiny); -%! assert (norm (A_tiny - L * U, "fro") / norm (A_tiny, "fro"), 0, n_tiny*eps); -%!test -%! [L,U] = iluc (A_small); -%! assert (norm (A_small - L * U, "fro") / norm (A_small, "fro"), 0, 1); -%!test -%! [L,U] = iluc (A_medium); -%! assert (norm (A_medium - L * U, "fro") / norm (A_medium, "fro"), 0, 1); -%!test -%! [L,U] = iluc (A_large); -%! assert (norm (A_large - L * U, "fro") / norm (A_large, "fro"), 0, 1); -*/ diff -r 168c0aa9bb05 -r df64071e538c libinterp/dldfcn/module-files --- a/libinterp/dldfcn/module-files Tue Aug 12 15:58:18 2014 +0100 +++ b/libinterp/dldfcn/module-files Mon Aug 18 12:32:16 2014 +0100 @@ -4,6 +4,8 @@ __eigs__.cc|$(ARPACK_CPPFLAGS) $(SPARSE_XCPPFLAGS)|$(ARPACK_LDFLAGS) $(SPARSE_XLDFLAGS)|$(ARPACK_LIBS) $(SPARSE_XLIBS) $(LAPACK_LIBS) $(BLAS_LIBS) __fltk_uigetfile__.cc|$(GRAPHICS_CFLAGS) $(FT2_CPPFLAGS)|$(GRAPHICS_LDFLAGS) $(FT2_LDFLAGS)|$(GRAPHICS_LIBS) $(FT2_LIBS) __glpk__.cc|$(GLPK_CPPFLAGS)|$(GLPK_LDFLAGS)|$(GLPK_LIBS) +__ichol__.cc +__ilu__.cc __init_fltk__.cc|$(GRAPHICS_CFLAGS) $(FT2_CPPFLAGS) $(FONTCONFIG_CPPFLAGS)|$(GRAPHICS_LDFLAGS) $(FT2_LDFLAGS)|$(GRAPHICS_LIBS) $(FT2_LIBS) $(OPENGL_LIBS) __init_gnuplot__.cc|$(FT2_CPPFLAGS) $(FONTCONFIG_CPPFLAGS)|| __magick_read__.cc|$(MAGICK_CPPFLAGS)|$(MAGICK_LDFLAGS)|$(MAGICK_LIBS) @@ -15,11 +17,6 @@ convhulln.cc|$(QHULL_CPPFLAGS)|$(QHULL_LDFLAGS)|$(QHULL_LIBS) dmperm.cc|$(SPARSE_XCPPFLAGS)|$(SPARSE_XLDFLAGS)|$(SPARSE_XLIBS) fftw.cc|$(FFTW_XCPPFLAGS)|$(FFTW_XLDFLAGS)|$(FFTW_XLIBS) -ichol0.cc -icholt.cc -ilu0.cc -iluc.cc -ilutp.cc qr.cc|$(QRUPDATE_CPPFLAGS) $(SPARSE_XCPPFLAGS)|$(QRUPDATE_LDFLAGS) $(SPARSE_XLDFLAGS)|$(QRUPDATE_LIBS) $(SPARSE_XLIBS) symbfact.cc|$(SPARSE_XCPPFLAGS)|$(SPARSE_XLDFLAGS)|$(SPARSE_XLIBS) symrcm.cc|$(SPARSE_XCPPFLAGS)|$(SPARSE_XLDFLAGS)|$(SPARSE_XLIBS) diff -r 168c0aa9bb05 -r df64071e538c scripts/sparse/ichol.m --- a/scripts/sparse/ichol.m Tue Aug 12 15:58:18 2014 +0100 +++ b/scripts/sparse/ichol.m Mon Aug 18 12:32:16 2014 +0100 @@ -103,13 +103,13 @@ ## L = chol(A, "lower"); ## nnz (L) ## ans = 10 -## norm (A - L * L', 'fro') / norm (A, 'fro') +## norm (A - L * L', "fro") / norm (A, "fro") ## ans = 1.1993e-16 ## opts.type = 'nofill'; ## L = ichol(A,opts); ## nnz (L) ## ans = 9 -## norm (A - L * L', 'fro') / norm (A, 'fro') +## norm (A - L * L', "fro") / norm (A, "fro") ## ans = 0.019736 ## @end example ## @@ -128,7 +128,7 @@ ## L = ichol (A, opts); ## nnz (tril (A)) ## ans = 239400 -## norm (A - L * L', 'fro') / norm (A, 'fro') +## norm (A - L * L', "fro") / norm (A, "fro") ## ans = 0.062327 ## @end example ## @@ -148,10 +148,17 @@ endif % Check input matrix - if (isempty (A) || ~issparse(A) || ~issquare (A)) + if (~issparse(A) || ~issquare (A)) error ("ichol: Input A must be a non-empty sparse square matrix"); endif + % If A is empty and sparse then return empty L + % Compatibility with Matlab + if (isempty (A)) + L = A; + return; + endif + % Check input structure, otherwise set default values if (nargin == 2) if (~isstruct (opts)) @@ -220,10 +227,8 @@ A += opts.diagcomp * diag (diag (A)); endif if (strcmp (opts.shape, "upper") == 1) - disp("entro"); A_in = triu (A); A_in = A_in'; - else A_in = tril (A); endif @@ -231,9 +236,9 @@ % Delegate to specialized ICHOL switch (opts.type) case "nofill" - L = ichol0 (A_in, opts.michol); + L = __ichol0__ (A_in, opts.michol); case "ict" - L = icholt (A_in, opts.droptol, opts.michol); + L = __icholt__ (A_in, opts.droptol, opts.michol); otherwise printf ("The input structure is invalid.\n"); endswitch @@ -245,18 +250,35 @@ endfunction -%!shared A1, A2 +%!shared A1, A2, A3, A4, A5, A6, A7 %! A1 = [ 0.37, -0.05, -0.05, -0.07; %! -0.05, 0.116, 0.0, -0.05; %! -0.05, 0.0, 0.116, -0.05; %! -0.07, -0.05, -0.05, 0.202]; %! A1 = sparse(A1); +%! A2 = gallery ('poisson', 30); +%! A3 = gallery ('tridiag', 50); %! nx = 400; ny = 200; %! hx = 1 / (nx + 1); hy = 1 / (ny + 1); %! Dxx = spdiags ([ones(nx, 1), -2 * ones(nx, 1), ones(nx, 1)], [-1 0 1 ], nx, nx) / (hx ^ 2); %! Dyy = spdiags ([ones(ny, 1), -2 * ones(ny, 1), ones(ny, 1)], [-1 0 1 ], ny, ny) / (hy ^ 2); -%! A2 = -kron (Dxx, speye (ny)) - kron (speye (nx), Dyy); +%! A4 = -kron (Dxx, speye (ny)) - kron (speye (nx), Dyy); +%! A5 = [ 0.37, -0.05, -0.05, -0.07; +%! -0.05, 0.116, 0.0, -0.05 + 0.05i; +%! -0.05, 0.0, 0.116, -0.05; +%! -0.07, -0.05 - 0.05i, -0.05, 0.202]; +%! A5 = sparse(A5); +%! A6 = [ 0.37, -0.05 - i, -0.05, -0.07; +%! -0.05 + i, 0.116, 0.0, -0.05; +%! -0.05, 0.0, 0.116, -0.05; +%! -0.07, -0.05, -0.05, 0.202]; +%! A6 = sparse(A6); +%! A7 = A5; +%! A7(1) = 2i; %! + +%!# Input validation tests + %!test %!error ichol ([]); %!error ichol (0); @@ -271,58 +293,191 @@ %!error ichol (sparse (-0)); %!error ichol (sparse (-1)); %!error ichol (sparse (-1)); -%! +%!test +%! opts.milu = 'foo'; +%!error L = ichol (A1, opts); +%! opts.milu = 1; +%!error L = ichol (A1, opts); +%! opts.milu = []; +%!error L = ichol (A1, opts); +%!test +%! opts.droptol = -1; +%!error L = ichol (A1, opts); +%! opts.droptol = 0.5i; +%!error L = ichol (A1, opts); +%! opts.droptol = []; +%!error L = ichol (A1, opts); +%!test +%! opts.type = 'foo'; +%!error L = ichol (A1, opts); +%! opts.type = 1; +%!error L = ichol (A1, opts); +%! opts.type = []; +%!error L = ichol (A1, opts); +%!test +%! opts.shape = 'foo'; +%!error L = ichol (A1, opts); +%! opts.shape = 1; +%!error L = ichol (A1, opts); +%! opts.shape = []; +%!error L = ichol (A1, opts); +%!test +%! opts.diagcomp = -1; +%!error L = ichol (A1, opts); +%! opts.diagcomp = 0.5i; +%!error L = ichol (A1, opts); +%! opts.diagcomp = []; +%!error L = ichol (A1, opts); + +%!# ICHOL0 tests + %!test %! opts.type = "nofill"; %! opts.michol = "off"; %! assert (nnz (tril (A1)), nnz (ichol (A1, opts))); %! assert (nnz (tril (A2)), nnz (ichol (A2, opts))); +%! assert (nnz (tril (A3)), nnz (ichol (A3, opts))); +%! assert (nnz (tril (A4)), nnz (ichol (A4, opts))); +%! assert (nnz (tril (A5)), nnz (ichol (A5, opts))); %! %!test %! opts.type = "nofill"; %! opts.michol = "off"; %! L = ichol (A1, opts); -%! assert (norm (A1 - L * L', 'fro') / norm (A1, 'fro'), 0.01, 0.01); -%! L = ichol (A2, opts); -%! assert (norm (A2 - L * L', 'fro') / norm (A2, 'fro'), 0.06, 0.01); +%! assert (norm (A1 - L * L', "fro") / norm (A1, "fro"), 0.0197, 1e-4); +%! opts.shape = "upper"; +%! U = ichol (A1, opts); +%! assert (norm (A1 - U' * U, "fro") / norm (A1, "fro"), 0.0197, 1e-4); +%! opts.shape = "lower"; +%! L = ichol (A1, opts); +%! assert (norm (A1 - L * L', "fro") / norm (A1, "fro"), 0.0197, 1e-4); +%! +%!test +%! opts.michol = "on"; +%! opts.shape = "lower"; +%! opts.type = "nofill"; +%! L = ichol (A1, opts); +%! assert (norm (A1 - L * L', "fro") / norm (A1, "fro"), 0.0279, 1e-4); +%! opts.shape = "upper"; +%! U = ichol (A1, opts); +%! assert (norm (A1 - U' * U, "fro") / norm (A1, "fro"), 0.0279, 1e-4); +%! opts.shape = "lower"; +%! opts.diagcomp = 3e-3; +%! L = ichol (A1, opts); +%! assert (norm (A1 - L * L', "fro") / norm (A1, "fro"), 0.0272, 1e-4); %! -%%!test -%%! opts.type = "nofill"; -%%! opts.michol = "off"; -%%! opts.shape = "upper"; -%%! U = ichol (A1, opts); -%%! assert (norm (A1 - U' * U, 'fro') / norm (A1, 'fro'), 0.01, 0.01); +%!test +%! opts.type = "nofill"; +%! opts.michol = "off"; +%! L = ichol (A2, opts); +%! assert (norm (A2 - L*L', "fro") / norm (A2, "fro"), 0.0893, 1e-4) +%! opts.michol = "on"; +%! L = ichol (A2, opts); +%! assert (norm (A2 - L*L', "fro") / norm (A2, "fro"), 0.2377, 1e-4) +%! +%!test +%! opts.type = "nofill"; +%! opts.michol = "off"; +%! L = ichol (A3, opts); +%! assert (norm (A3 - L*L', "fro") / norm (A3, "fro"), eps, eps); +%! opts.michol = "on"; +%! L = ichol (A3, opts); +%! assert (norm (A3 - L*L', "fro") / norm (A3, "fro"), eps, eps); +%! +%!test +%! opts.type = "nofill"; +%! opts.michol = "off"; +%! L = ichol (A4, opts); +%! assert (norm (A4 - L*L', "fro") / norm (A4, "fro"), 0.0623, 1e-4); +%! opts.michol = "on"; +%! L = ichol (A4, opts); +%! assert (norm (A4 - L*L', "fro") / norm (A4, "fro"), 0.1664, 1e-4); %! %!test %! opts.type = "nofill"; %! opts.michol = "off"; +%! L = ichol (A5, opts); +%! assert (norm (A5 - L*L', "fro") / norm (A5, "fro"), 0.0195, 1e-4); +%! opts.michol = "on"; +%! L = ichol (A5, opts); +%! assert (norm (A5 - L*L', "fro") / norm (A5, "fro"), 0.0276, 1e-4); +%!test +%% Negative pivot +%!error ichol (A6); +%% Complex entry in the diagonal +%!error ichol (A7); + +%%!ICHOLT tests + +%%!test +%! opts.type = "ict"; +%! opts.droptol = 1e-1; +%! opts.michol = "off"; +%! L = ichol (A1, opts); +%! assert (norm (A1 - L * L', "fro") / norm (A1, "fro"), 0.2065, 1e-4); +%! opts.shape = "upper"; +%! U = ichol (A1, opts); +%! assert (norm (A1 - U' * U, "fro") / norm (A1, "fro"), 0.2065, 1e-4); %! opts.shape = "lower"; %! L = ichol (A1, opts); -%! assert (norm (A1 - L * L', 'fro') / norm (A1, 'fro'), 0.01, 0.01); +%! assert (norm (A1 - L * L', "fro") / norm (A1, "fro"), 0.2065, 1e-4); %! -%!test -%! opts.type = "nofill"; +%%!test +%! opts.type = "ict"; +%! opts.droptol = 1e-1; %! opts.michol = "on"; %! L = ichol (A1, opts); -%! assert (norm (A1 - L * L', 'fro') / norm (A1, 'fro'), 0.02, 0.01); -%! -%!test -%! opts.type = "nofill"; -%! opts.michol = "on"; +%! assert (norm (A1 - L * L', "fro") / norm (A1, "fro"), 0.3266, 1e-4); +%! opts.shape = "upper"; +%! U = ichol (A1, opts); +%! assert (norm (A1 - U' * U, "fro") / norm (A1, "fro"), 0.3266, 1e-4); +%! opts.shape = "lower"; %! opts.diagcomp = 3e-3; %! L = ichol (A1, opts); -%! assert (norm (A1 - L * L', 'fro') / norm (A1, 'fro'), 0.02, 0.01); +%! assert (norm (A1 - L * L', "fro") / norm (A1, "fro"), 0.3266, 1e-4); %! %!test %! opts.type = "ict"; +%! opts.droptol = 1e-1; %! opts.michol = "off"; -%! opts.droptol = 1e-4; -%! L = ichol (A1, opts); -%! assert (norm (A1 - L * L', 'fro') / norm (A1, 'fro'), eps, eps); +%! L = ichol (A2, opts); +%! assert (norm (A2 - L*L', "fro") / norm (A2, "fro"), 0.0893, 1e-4) +%! opts.michol = "on"; +%! L = ichol (A2, opts); +%! assert (norm (A2 - L*L', "fro") / norm (A2, "fro"), 0.2377, 1e-4) +%! +%!test +%! opts.type = "ict"; +%! opts.droptol = 1e-1; +%! opts.michol = "off"; +%! L = ichol (A3, opts); +%! assert (norm (A3 - L*L', "fro") / norm (A3, "fro"), eps, eps); +%! opts.michol = "on"; +%! L = ichol (A3, opts); +%! assert (norm (A3 - L*L', "fro") / norm (A3, "fro"), eps, eps); %! %!test %! opts.type = "ict"; +%! opts.droptol = 1e-1; %! opts.michol = "off"; -%! opts.droptol = 1e-4; -%! L = ichol (A2, opts); -%! assert (norm (A2 - L * L', 'fro') / norm (A2, 'fro'), 5e-4, 5e-4); +%! L = ichol (A4, opts); +%! assert (norm (A4 - L*L', "fro") / norm (A4, "fro"), 0.1224, 1e-4); +%! opts.michol = "on"; +%! L = ichol (A4, opts); +%! assert (norm (A4 - L*L', "fro") / norm (A4, "fro"), 0.2118, 1e-4); +%! +%!test +%! opts.type = "ict"; +%! opts.droptol = 1e-1; +%! opts.michol = "off"; +%! L = ichol (A5, opts); +%! assert (norm (A5 - L*L', "fro") / norm (A5, "fro"), 0.2044, 1e-4); +%! opts.michol = "on"; +%! L = ichol (A5, opts); +%! assert (norm (A5 - L*L', "fro") / norm (A5, "fro"), 0.3231, 1e-4); +%!test +%% Negative pivot +%! opts.type = "ict"; +%!error ichol (A6, setup); +%% Complex entry in the diagonal +%!error ichol (A7, setup); diff -r 168c0aa9bb05 -r df64071e538c scripts/sparse/ilu.m --- a/scripts/sparse/ilu.m Tue Aug 12 15:58:18 2014 +0100 +++ b/scripts/sparse/ilu.m Mon Aug 18 12:32:16 2014 +0100 @@ -162,11 +162,21 @@ print_usage (); endif + % Check input matrix - if (~issparse(A) || ~issquare (A)) + if (~issparse (A) || ~issquare (A)) error ("ilu: Input A must be a sparse square matrix."); endif + % If A is empty and sparse then return empty L, U and P + % Compatibility with Matlab + if (isempty (A)) + L = A; + U = A; + P = A; + return; + endif + % Check input structure, otherwise set default values if (nargin == 2) if (~isstruct (setup)) @@ -231,20 +241,20 @@ % Delegate to specialized ILU switch (setup.type) case "nofill" - [L, U] = ilu0 (A, setup.milu); + [L, U] = __ilu0__ (A, setup.milu); if (nargout == 3) P = speye (length (A)); endif case "crout" - [L, U] = iluc (A, setup.droptol, setup.milu); + [L, U] = __iluc__ (A, setup.droptol, setup.milu); if (nargout == 3) P = speye (length (A)); endif case "ilutp" if (nargout == 2) - [L, U] = ilutp (A, setup.droptol, setup.thresh, setup.milu, setup.udiag); + [L, U] = __ilutp__ (A, setup.droptol, setup.thresh, setup.milu, setup.udiag); elseif (nargout == 3) - [L, U, P] = ilutp (A, setup.droptol, setup.thresh, setup.milu, setup.udiag); + [L, U, P] = __ilutp__ (A, setup.droptol, setup.thresh, setup.milu, setup.udiag); endif otherwise printf ("The input structure is invalid.\n"); @@ -263,46 +273,275 @@ %!test %! setup.type = 'nofill'; %! assert (nnz (ilu (A, setup)), 7840); +%! # This test is taken from the mathworks and should work for full support. %!test -%! # This test is taken from the mathworks and should work for full support. %! setup.type = 'crout'; %! setup.milu = 'row'; %! setup.droptol = dtol; %! [L, U] = ilu (A, setup); -%! e = ones (size (A,2),1); +%! e = ones (size (A, 2),1); %! assert (norm (A*e - L*U*e), 1e-14, 1e-14); %!test %! setup.type = 'crout'; %! setup.droptol = dtol; -%! [L, U] = ilu(A,setup); +%! [L, U] = ilu(A, setup); %! assert (norm (A - L * U, 'fro') / norm (A, 'fro'), 0.05, 1e-2); + +%! # Tests for input validation +%!test +%! [L, U] = ilu (sparse ([])); +%! assert (isempty (L), logical (1)); +%! assert (isempty (U), logical (1)); +%! setup.type = 'crout'; +%! [L, U] = ilu (sparse ([]), setup); +%! assert (isempty (L), logical (1)); +%! assert (isempty (U), logical (1)); +%! setup.type = 'ilutp'; +%! [L, U] = ilu (sparse ([]), setup); +%! assert (isempty (L), logical (1)); +%! assert (isempty (U), logical (1)); +%!error [L, U] = ilu (0); +%!error [L, U] = ilu ([]); +%!error [L, U] = ilu (sparse (0)); +%!test +%! setup.type = 'foo'; +%!error [L, U] = ilu (A_tiny, setup); +%! setup.type = 1; +%!error [L, U] = ilu (A_tiny, setup); +%! setup.type = []; +%!error [L, U] = ilu (A_tiny, setup); +%!test +%! setup.droptol = -1; +%!error [L, U] = ilu (A_tiny, setup); +%! setup.droptol = 0.5i; +%!error [L, U] = ilu (A_tiny, setup); +%! setup.droptol = []; +%!error [L, U] = ilu (A_tiny, setup); +%!test +%! setup.thresh= -1; +%!error [L, U] = ilu (A_tiny, setup); +%! setup.thresh = 0.5i; +%!error [L, U] = ilu (A_tiny, setup); +%! setup.thresh = []; +%!error [L, U] = ilu (A_tiny, setup); +%! setup.thresh = 2; +%!error [L, U] = ilu (A_tiny, setup); +%!test +%! setup.diag = 0.5; +%!error [L, U] = ilu (A_tiny, setup); +%! setup.diag = []; +%!error [L, U] = ilu (A_tiny, setup); +%! setup.diag = -1; +%!error [L, U] = ilu (A_tiny, setup); +%!test +%! setup.milu = 'foo'; +%!error [L, U] = ilu (A_tiny, setup); +%! setup.milu = 1; +%!error [L, U] = ilu (A_tiny, setup); +%! setup.milu = []; +%!error [L, U] = ilu (A_tiny, setup); + +%! # Check if the elements in U satisfy the non-dropping condition. %!test %! setup.type = 'crout'; %! setup.droptol = dtol; %! [L, U] = ilu (A, setup); %! for j = 1:n -%! cmp_value = dtol * norm (A(:, j)) / 2; +%! cmp_value = dtol * norm (A(:, j)); %! non_zeros = nonzeros (U(:, j)); %! for i = 1:length (non_zeros); %! assert (abs (non_zeros (i)) >= cmp_value, logical (1)); %! endfor %! endfor %!test -%! setup.type = 'crout'; +%! setup.type = 'ilutp'; %! setup.droptol = dtol; %! [L, U] = ilu (A, setup); %! for j = 1:n -%! cmp_value = dtol * norm (A(:, j)) / 2; +%! cmp_value = dtol * norm (A(:, j)); %! non_zeros = nonzeros (U(:, j)); %! for i = 1:length (non_zeros); %! assert (abs (non_zeros (i)) >= cmp_value, logical (1)); %! endfor %! endfor + +%! # Check that the complete LU factorisation with crout and ilutp algorithms +%! # output the same result. %!test %! setup.type = 'crout'; %! setup.droptol = 0; %! [L1, U1] = ilu (A, setup); %! setup.type = 'ilutp'; +%! setup.thresh = 0; %! [L2, U2] = ilu (A, setup); %! assert (norm (L1 - L2, 'fro') / norm (L1, 'fro'), 0, eps); %! assert (norm (U1 - U2, 'fro') / norm (U1, 'fro'), 0, eps); + +%! # Tests for real matrices of different sizes for ilu0, iluc and ilutp. +%! # The difference A - L*U should be not greater than eps because with droptol +%! # equaling 0, the LU complete factorization is performed. +%!shared n_tiny, n_small, n_medium, n_large, A_tiny, A_small, A_medium, A_large +%! n_tiny = 5; +%! n_small = 40; +%! n_medium = 600; +%! n_large = 10000; +%! A_tiny = spconvert ([1 4 2 3 3 4 2 5; 1 1 2 3 4 4 5 5; 1 2 3 4 5 6 7 8]'); +%! A_small = sprand (n_small, n_small, 1/n_small) + speye (n_small); +%! A_medium = sprand (n_medium, n_medium, 1/n_medium) + speye (n_medium); +%! A_large = sprand (n_large, n_large, 1/n_large/10) + speye (n_large); +%! +%!test +%! setup.type = "nofill"; +%! [L, U] = ilu (A_tiny); +%! assert (norm (A_tiny - L * U, "fro") / norm (A_tiny, "fro"), 0, n_tiny * eps); +%!test +%! setup.type = "nofill"; +%! [L, U] = ilu (A_small); +%! assert (norm (A_small - L * U, "fro") / norm (A_small, "fro"), 0, 1); +%!test +%! setup.type = "nofill"; +%! [L, U] = ilu (A_medium); +%! assert (norm (A_medium - L * U, "fro") / norm (A_medium, "fro"), 0, 1); +%!test +%! setup.type = "nofill"; +%! [L, U] = ilu (A_large); +%! assert (norm (A_large - L * U, "fro") / norm (A_large, "fro"), 0, 1); +%! +%!test +%! setup.type = "crout"; +%! [L, U] = ilu (A_tiny, setup); +%! assert (norm (A_tiny - L * U, "fro") / norm (A_tiny, "fro"), eps, eps); +%!test +%! setup.type = "crout"; +%! [L, U] = ilu (A_small, setup); +%! assert (norm (A_small - L * U, "fro") / norm (A_small, "fro"), eps, eps); +%!test +%! setup.type = "crout"; +%! [L, U] = ilu (A_medium, setup); +%! assert (norm (A_medium - L * U, "fro") / norm (A_medium, "fro"), eps, eps); +%!test +%! setup.type = "crout"; +%! [L, U] = ilu (A_large, setup); +%! assert (norm (A_large - L * U, "fro") / norm (A_large, "fro"), eps, eps); +%! +%!test +%! setup.type = "ilutp"; +%! setup.droptol = 0; +%! setup.thresh = 0; +%! [L, U] = ilu (A_tiny, setup); +%! assert (norm (A_tiny - L * U, "fro") / norm (A_tiny, "fro"), eps, eps); +%!test +%! setup.type = "ilutp"; +%! setup.droptol = 0; +%! setup.thresh = 0; +%! [L, U] = ilu (A_small, setup); +%! assert (norm (A_small - L * U, "fro") / norm (A_small, "fro"), eps, eps); +%!test +%! setup.type = "ilutp"; +%! setup.droptol = 0; +%! setup.thresh = 0; +%! [L, U] = ilu (A_medium, setup); +%! assert (norm (A_medium - L * U, "fro") / norm (A_medium, "fro"), eps, eps); +%!test +%! setup.type = "ilutp"; +%! setup.droptol = 0; +%! setup.thresh = 0; +%! [L, U] = ilu (A_large, setup); +%! assert (norm (A_large - L * U, "fro") / norm (A_large, "fro"), eps, eps); +%! + +%! # Tests for complex matrices of different sizes for ilu0, iluc and ilutp. +%!shared n_tiny, n_small, n_medium, n_large, A_tiny, A_small, A_medium, A_large +%! n_tiny = 5; +%! n_small = 40; +%! n_medium = 600; +%! n_large = 10000; +%! A_tiny = spconvert ([1 4 2 3 3 4 2 5; 1 1 2 3 4 4 5 5; 1 2 3 4 5 6 7 8]'); +%! A_tiny(1,1) += 1i; +%! A_small = sprand(n_small, n_small, 1/n_small) + ... +%! i * sprand(n_small, n_small, 1/n_small) + speye (n_small); +%! A_medium = sprand(n_medium, n_medium, 1/n_medium) + ... +%! i * sprand(n_medium, n_medium, 1/n_medium) + speye (n_medium); +%! A_large = sprand(n_large, n_large, 1/n_large/10) + ... +%! i * sprand(n_large, n_large, 1/n_large/10) + speye (n_large); +%! +%!test +%! setup.type = "nofill"; +%! [L, U] = ilu (A_tiny); +%! assert (norm (A_tiny - L * U, "fro") / norm (A_tiny, "fro"), 0, n_tiny * eps); +%!test +%! setup.type = "nofill"; +%! [L, U] = ilu (A_small); +%! assert (norm (A_small - L * U, "fro") / norm (A_small, "fro"), 0, 1); +%!test +%! setup.type = "nofill"; +%! [L, U] = ilu (A_medium); +%! assert (norm (A_medium - L * U, "fro") / norm (A_medium, "fro"), 0, 1); +%!test +%! setup.type = "nofill"; +%! [L, U] = ilu (A_large); +%! assert (norm (A_large - L * U, "fro") / norm (A_large, "fro"), 0, 1); +%! +%!test +%! setup.type = "crout"; +%! [L, U] = ilu (A_tiny, setup); +%! assert (norm (A_tiny - L * U, "fro") / norm (A_tiny, "fro"), eps, eps); +%!test +%! setup.type = "crout"; +%! [L, U] = ilu (A_small, setup); +%! assert (norm (A_small - L * U, "fro") / norm (A_small, "fro"), eps, eps); +%!test +%! setup.type = "crout"; +%! [L, U] = ilu (A_medium, setup); +%! assert (norm (A_medium - L * U, "fro") / norm (A_medium, "fro"), eps, eps); +%!test +%! setup.type = "crout"; +%! [L, U] = ilu (A_large, setup); +%! assert (norm (A_large - L * U, "fro") / norm (A_large, "fro"), eps, eps); +%! +%!test +%! setup.type = "ilutp"; +%! setup.droptol = 0; +%! setup.thresh = 0; +%! [L, U] = ilu (A_tiny, setup); +%! assert (norm (A_tiny - L * U, "fro") / norm (A_tiny, "fro"), eps, eps); +%!test +%! setup.type = "ilutp"; +%! setup.droptol = 0; +%! setup.thresh = 0; +%! [L, U] = ilu (A_small, setup); +%! assert (norm (A_small - L * U, "fro") / norm (A_small, "fro"), eps, eps); +%!test +%! setup.type = "ilutp"; +%! setup.droptol = 0; +%! setup.thresh = 0; +%! [L, U] = ilu (A_medium, setup); +%! assert (norm (A_medium - L * U, "fro") / norm (A_medium, "fro"), eps, eps); +%!test +%! setup.type = "ilutp"; +%! setup.droptol = 0; +%! setup.thresh = 0; +%! [L, U] = ilu (A_large, setup); +%! assert (norm (A_large - L * U, "fro") / norm (A_large, "fro"), eps, eps); + +%! #Specific tests for ilutp +%!shared a1, a2 +%! a1 = sparse ([0 0 4 3 1; 5 1 2.3 2 4.5; 0 0 0 2 1;0 0 8 0 2.2; 0 0 9 9 1 ]); +%! a2 = sparse ([3 1 0 0 4; 3 1 0 0 -2;0 0 8 0 0; 0 4 0 4 -4.5; 0 -1 0 0 1]); +%!test +%! setup.udiag = 1; +%! setup.type = "ilutp"; +%! setup.droptol = 0.2; +%! [L, U, P] = ilu (a1, setup); +%! assert (norm (U, "fro"), 17.4577, 1e-4); +%! assert (norm (L, "fro"), 2.4192, 1e-4); +%! setup.udiag = 0; +%!error [L, U, P] = ilu (a1, setup); +%! +%!test +%! setup.type = "ilutp"; +%! setup.droptol = 0; +%! setup.thresh = 0; +%! setup.milu = "row"; +%!error [L, U] = ilu (a2, setup); +%!