Mercurial > octave
diff libinterp/dldfcn/__ichol__.cc @ 19054:df64071e538c
Removed ichol0.cc, icholt.cc, ilu0.cc, iluc.cc, ilutp.cc. Created __ichol__.cc and __ilu__.cc. Minor bugs fixed.
* libinterp/dldfcn/__ichol__.cc: File created now contains __ichol0__ and __icholt__ functions.
* libinterp/dldfcn/__ilu__.cc: File created now contains __ilu0__ __iluc__ and __ilutp__ functions.
* scripts/sparse/ichol.m: Tests have been moved from .cc files to this one.
* changed scripts/sparse/ilu.m: Tests have been moved from .cc files to this one.
author | Eduardo Ramos (edu159) <eduradical951@gmail.com> |
---|---|
date | Mon, 18 Aug 2014 12:32:16 +0100 |
parents | |
children | 38937efbee21 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libinterp/dldfcn/__ichol__.cc Mon Aug 18 12:32:16 2014 +0100 @@ -0,0 +1,552 @@ +/** + * Copyright (C) 2013 Kai T. Ohlhus <k.ohlhus@gmail.com> + * Copyright (C) 2014 Eduardo Ramos Fernández <eduradical951@gmail.com> + * + * This file is part of Octave. + * + * Octave is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation; either version 3 of the License, or (at your option) any later + * version. + * + * Octave is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * Octave; see the file COPYING. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "defun-dld.h" +#include "parse.h" + +// Secondary functions for complex and real case used +// in ichol algorithms. +Complex ichol_mult_complex (Complex a, Complex b) +{ + b.imag (-std::imag (b)); + return a * b; +} + +bool ichol_checkpivot_complex (Complex pivot) +{ + if (pivot.imag () != 0) + { + error ("ichol: Non-real pivot encountered. \ + The matrix must be hermitian."); + return false; + } + else if (pivot.real () < 0) + { + error ("ichol: Non-positive pivot encountered."); + return false; + } + return true; + +} + +bool ichol_checkpivot_real (double pivot) +{ + if (pivot < 0) + { + error ("ichol: Non-positive pivot encountered."); + return false; + } + return true; +} + +double ichol_mult_real (double a, double b) +{ + return a * b; +} + +template <typename octave_matrix_t, typename T, T (*ichol_mult) (T, T), + bool (*ichol_checkpivot) (T)> +void ichol_0 (octave_matrix_t& sm, const std::string michol = "off") +{ + + const octave_idx_type n = sm.cols (); + octave_idx_type j1, jend, j2, jrow, jjrow, j, jw, i, k, jj, Llist_len, r; + T tl; + char opt; + enum {OFF, ON}; + if (michol == "on") + opt = ON; + else + opt = OFF; + + // Input matrix pointers + octave_idx_type* cidx = sm.cidx (); + octave_idx_type* ridx = sm.ridx (); + T* data = sm.data (); + + // Working arrays + OCTAVE_LOCAL_BUFFER (octave_idx_type, Lfirst, n); + OCTAVE_LOCAL_BUFFER (octave_idx_type, Llist, n); + OCTAVE_LOCAL_BUFFER (octave_idx_type, iw, n); + OCTAVE_LOCAL_BUFFER (T, dropsums, n); + + // Initialize working arrays + for (i = 0; i < n; i++) + { + iw[i] = -1; + Llist[i] = -1; + Lfirst[i] = -1; + dropsums[i] = 0; + } + + // Main loop + for (k = 0; k < n; k++) + { + j1 = cidx[k]; + j2 = cidx[k+1]; + for (j = j1; j < j2; j++) + iw[ridx[j]] = j; + + jrow = Llist [k]; + // Iterate over each non-zero element in the actual row. + while (jrow != -1) + { + jjrow = Lfirst[jrow]; + jend = cidx[jrow+1]; + for (jj = jjrow; jj < jend; jj++) + { + r = ridx[jj]; + jw = iw[r]; + tl = ichol_mult (data[jj], data[jjrow]); + if (jw != -1) + data[jw] -= tl; + else + // Because the simetry of the matrix we know the drops + // in the column r are also in the column k. + if (opt == ON) + { + dropsums[r] -= tl; + dropsums[k] -= tl; + } + } + // Update the linked list and the first entry of the + // actual column. + if ((jjrow + 1) < jend) + { + Lfirst[jrow]++; + j = jrow; + jrow = Llist[jrow]; + Llist[j] = Llist[ridx[Lfirst[j]]]; + Llist[ridx[Lfirst[j]]] = j; + } + else + jrow = Llist[jrow]; + } + + if (opt == ON) + data[j1] += dropsums[k]; + + if (ridx[j1] != k) + { + error ("ichol: There is a pivot equal to zero."); + break; + } + + if (! ichol_checkpivot (data[j1])) + break; + + data[cidx[k]] = std::sqrt (data[j1]); + + // Update Llist and Lfirst with the k-column information. + // Also scale the column elements by the pivot and reset + // the working array iw. + if (k < (n - 1)) + { + iw[ridx[j1]] = -1; + for(i = j1 + 1; i < j2; i++) + { + iw[ridx[i]] = -1; + data[i] /= data[j1]; + } + Lfirst[k] = j1; + if ((Lfirst[k] + 1) < j2) + { + Lfirst[k]++; + jjrow = ridx[Lfirst[k]]; + Llist[k] = Llist[jjrow]; + Llist[jjrow] = k; + } + } + } +} + +DEFUN_DLD (__ichol0__, args, nargout, "-*- texinfo -*-\n\ +@deftypefn {Loadable Function} {@var{L} =} __ichol0__ (@var{A})\n\ +@deftypefnx {Loadable Function} {@var{L} =} __ichol0__ (@var{A}, @var{michol})\n\ +Undocumented internal function.\n\ +@end deftypefn") + +{ + octave_value_list retval; + + int nargin = args.length (); + std::string michol = "off"; + + + if (nargout > 1 || nargin < 1 || nargin > 2) + { + print_usage (); + return retval; + } + + if (args(0).is_scalar_type () || ! args(0).is_sparse_type ()) + error ("__ichol0__: 1. parameter must be a sparse square matrix."); + + if (args(0).is_empty ()) + { + retval(0) = octave_value (SparseMatrix ()); + return retval; + } + + + if (nargin == 2) + { + michol = args(1).string_value (); + if (error_state || ! (michol == "on" || michol == "off")) + error ("__ichol0__: 2. parameter must be 'on' or 'off' character string."); + } + + + if (!error_state) + { + // In ICHOL0 algorithm the zero-pattern of the input matrix is preserved so + // it's structure does not change during the algorithm. The same input + // matrix is used to build the output matrix due to that fact. + octave_value_list param_list; + if (!args(0).is_complex_type ()) + { + SparseMatrix sm = args(0).sparse_matrix_value (); + param_list.append (sm); + sm = feval ("tril", param_list)(0).sparse_matrix_value (); + ichol_0 <SparseMatrix, double, ichol_mult_real, ichol_checkpivot_real> (sm, michol); + if (! error_state) + retval(0) = octave_value (sm); + } + else + { + SparseComplexMatrix sm = args(0).sparse_complex_matrix_value (); + param_list.append (sm); + sm = feval ("tril", param_list)(0).sparse_complex_matrix_value (); + ichol_0 <SparseComplexMatrix, Complex, ichol_mult_complex, ichol_checkpivot_complex> (sm, michol); + if (! error_state) + retval(0) = octave_value (sm); + } + + } + + return retval; +} + +template <typename octave_matrix_t, typename T, T (*ichol_mult) (T, T), + bool (*ichol_checkpivot) (T)> +void ichol_t (const octave_matrix_t& sm, octave_matrix_t& L, const T* cols_norm, + const T droptol, const std::string michol = "off") + +{ + + const octave_idx_type n = sm.cols (); + octave_idx_type j, jrow, jend, jjrow, jw, i, k, jj, Llist_len, total_len, w_len, + max_len, ind; + + char opt; + enum {OFF, ON}; + if (michol == "on") + opt = ON; + else + opt = OFF; + + // Input matrix pointers + octave_idx_type* cidx = sm.cidx (); + octave_idx_type* ridx = sm.ridx (); + T* data = sm.data (); + + // Output matrix data structures. Because it is not known the + // final zero pattern of the output matrix due to fill-in elements, + // an heuristic approach has been adopted for memory allocation. The + // size of ridx_out_l and data_out_l is incremented 10% of their actual + // size (nnz(A) in the beginning). If that amount is less than n, their + // size is just incremented in n elements. This way the number of + // reallocations decrease throughout the process, obtaining a good performance. + max_len = sm.nnz (); + max_len += (0.1 * max_len) > n ? 0.1 * max_len : n; + Array <octave_idx_type> cidx_out_l (dim_vector (n + 1, 1)); + octave_idx_type* cidx_l = cidx_out_l.fortran_vec (); + Array <octave_idx_type> ridx_out_l (dim_vector (max_len ,1)); + octave_idx_type* ridx_l = ridx_out_l.fortran_vec (); + Array <T> data_out_l (dim_vector (max_len, 1)); + T* data_l = data_out_l.fortran_vec (); + + // Working arrays + OCTAVE_LOCAL_BUFFER (T, w_data, n); + OCTAVE_LOCAL_BUFFER (octave_idx_type, Lfirst, n); + OCTAVE_LOCAL_BUFFER (octave_idx_type, Llist, n); + OCTAVE_LOCAL_BUFFER (T, col_drops, n); + std::vector <octave_idx_type> vec; + vec.resize (n); + + + T zero = T (0); + cidx_l[0] = cidx[0]; + for (i = 0; i < n; i++) + { + Llist[i] = -1; + Lfirst[i] = -1; + w_data[i] = 0; + col_drops[i] = zero; + vec[i] = 0; + } + + total_len = 0; + for (k = 0; k < n; k++) + { + ind = 0; + for (j = cidx[k]; j < cidx[k+1]; j++) + { + w_data[ridx[j]] = data[j]; + if (ridx[j] != k) + { + vec[ind] = ridx[j]; + ind++; + } + } + jrow = Llist[k]; + while (jrow != -1) + { + jjrow = Lfirst[jrow]; + jend = cidx_l[jrow+1]; + for (jj = jjrow; jj < jend; jj++) + { + j = ridx_l[jj]; + // If the element in the j position of the row is zero, + // then it will become non-zero, so we add it to the + // vector that keeps track of non-zero elements in the working row. + if (w_data[j] == zero) + { + vec[ind] = j; + ind++; + } + w_data[j] -= ichol_mult (data_l[jj], data_l[jjrow]); + + } + // Update the actual column first element and update the + // linked list of the jrow row. + if ((jjrow + 1) < jend) + { + Lfirst[jrow]++; + j = jrow; + jrow = Llist[jrow]; + Llist[j] = Llist[ridx_l[Lfirst[j]]]; + Llist[ridx_l[Lfirst[j]]] = j; + } + else + jrow = Llist[jrow]; + } + + // Resizing output arrays + if ((max_len - total_len) < n) + { + max_len += (0.1 * max_len) > n ? 0.1 * max_len : n; + data_out_l.resize (dim_vector (max_len, 1)); + data_l = data_out_l.fortran_vec (); + ridx_out_l.resize (dim_vector (max_len, 1)); + ridx_l = ridx_out_l.fortran_vec (); + } + + // The sorting of the non-zero elements of the working column can be + // handled in a couple of ways. The most efficient two I found, are + // keeping the elements in an ordered binary search tree dinamically + // or keep them unsorted in a vector and at the end of the outer + // iteration order them. The last approach exhibit lower execution + // times. + std::sort (vec.begin (), vec.begin () + ind); + + data_l[total_len] = w_data[k]; + ridx_l[total_len] = k; + w_len = 1; + + // Extract then non-zero elements of working column and drop the + // elements that are lower than droptol * cols_norm[k]. + for (i = 0; i < ind ; i++) + { + jrow = vec[i]; + if (w_data[jrow] != zero) + { + if (std::abs (w_data[jrow]) < (droptol * cols_norm[k])) + { + if (opt == ON) + { + col_drops[k] += w_data[jrow]; + col_drops[jrow] += w_data[jrow]; + } + } + else + { + data_l[total_len + w_len] = w_data[jrow]; + ridx_l[total_len + w_len] = jrow; + w_len++; + } + vec[i] = 0; + } + w_data[jrow] = zero; + } + + // Compensate column sums --> michol option + if (opt == ON) + data_l[total_len] += col_drops[k]; + + if (data_l[total_len] == zero) + { + error ("ichol: There is a pivot equal to zero."); + break; + } + else if (! ichol_checkpivot (data_l[total_len])) + break; + + // Once the elements are dropped and compensation of columns + // sums are done, scale the elements by the pivot. + data_l[total_len] = std::sqrt (data_l[total_len]); + for (jj = total_len + 1; jj < (total_len + w_len); jj++) + data_l[jj] /= data_l[total_len]; + total_len += w_len; + // Check if there are too many elements to be indexed with octave_idx_type + // type due to fill-in during the process. + if (total_len < 0) + { + error ("ichol: Integer overflow. Too many fill-in elements in L"); + break; + } + cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len; + + // Update Llist and Lfirst with the k-column information. + if (k < (n - 1)) + { + Lfirst[k] = cidx_l[k]; + if ((Lfirst[k] + 1) < cidx_l[k+1]) + { + Lfirst[k]++; + jjrow = ridx_l[Lfirst[k]]; + Llist[k] = Llist[jjrow]; + Llist[jjrow] = k; + } + } + + } + + if (! error_state) + { + // Build the output matrices + L = octave_matrix_t (n, n, total_len); + for (i = 0; i <= n; i++) + L.cidx (i) = cidx_l[i]; + for (i = 0; i < total_len; i++) + { + L.ridx (i) = ridx_l[i]; + L.data (i) = data_l[i]; + } + } + +} + +DEFUN_DLD (__icholt__, args, nargout, "-*- texinfo -*-\n\ +@deftypefn {Loadable Function} {@var{L} =} __icholt__ (@var{A})\n\ +@deftypefnx {Loadable Function} {@var{L} =} __icholt__ (@var{A}, @var{droptol})\n\ +@deftypefnx {Loadable Function} {@var{L} =} __icholt__ (@var{A}, @var{droptol}, @var{michol})\n\ +Undocumented internal function.\n\ +@end deftypefn") +{ + octave_value_list retval; + int nargin = args.length (); + // Default values of parameters + std::string michol = "off"; + double droptol = 0; + + + if (nargout > 1 || nargin < 1 || nargin > 3) + { + print_usage (); + return retval; + } + + if (args(0).is_scalar_type () || ! args(0).is_sparse_type ()) + error ("__icholt__: 1. parameter must be a sparse square matrix."); + + if (args(0).is_empty ()) + { + retval(0) = octave_value (SparseMatrix ()); + return retval; + } + + if (! error_state && (nargin >= 2)) + { + droptol = args(1).double_value (); + if (error_state || (droptol < 0) || ! args(1).is_real_scalar ()) + error ("__icholt__: 2. parameter must be a positive real scalar."); + } + + if (! error_state && (nargin == 3)) + { + michol = args(2).string_value (); + if (error_state || !(michol == "on" || michol == "off")) + error ("__icholt__: 3. parameter must be 'on' or 'off' character string."); + } + + if (!error_state) + { + octave_value_list param_list; + if (! args(0).is_complex_type ()) + { + Array <double> cols_norm; + SparseMatrix L; + param_list.append (args(0).sparse_matrix_value ()); + SparseMatrix sm_l = feval ("tril", + param_list)(0).sparse_matrix_value (); + param_list(0) = sm_l; + param_list(1) = 1; + param_list(2) = "cols"; + cols_norm = feval ("norm", param_list)(0).vector_value (); + param_list.clear (); + ichol_t <SparseMatrix, + double, ichol_mult_real, ichol_checkpivot_real> + (sm_l, L, cols_norm.fortran_vec (), droptol, michol); + if (! error_state) + retval(0) = octave_value (L); + } + else + { + Array <Complex> cols_norm; + SparseComplexMatrix L; + param_list.append (args(0).sparse_complex_matrix_value ()); + SparseComplexMatrix sm_l = feval ("tril", + param_list)(0).sparse_complex_matrix_value (); + param_list(0) = sm_l; + param_list(1) = 1; + param_list(2) = "cols"; + cols_norm = feval ("norm", param_list)(0).complex_vector_value (); + param_list.clear (); + ichol_t <SparseComplexMatrix, + Complex, ichol_mult_complex, ichol_checkpivot_complex> + (sm_l, L, cols_norm.fortran_vec (), Complex (droptol), michol); + if (! error_state) + retval(0) = octave_value (L); + } + + } + + return retval; +} + +/* +## No test needed for internal helper function. +%!assert (1) +*/