view libinterp/dldfcn/icholt.cc @ 19053:168c0aa9bb05

Added all the files related with ilu.m and ichol.m functions. * ichol0.cc: New file added to libinterp/dldfcn * icholt.cc: New file added to libinterp/dldfcn * ilu0.cc: New file added to libinterp/dldfcn * iluc.cc: New file added to libinterp/dldfcn * ilutp.cc: New file added to libinterp/dldfcn * ichol.m: New file added to libinterp/dldfcn. Wrapper for ichol0 and icholt. * ilu.m: New file added to libinterp/dldfcn. Wrapper for ilu0, iluc and ilutp. * module-files: Added the above files to allow their compilation.
author Eduardo Ramos (edu159) <eduradical951@gmail.com>
date Tue, 12 Aug 2014 15:58:18 +0100
parents
children
line wrap: on
line source

/**
 * Copyright (C) 2014 Eduardo Ramos Fernández <eduradical951@gmail.com>
 *
 * This file is part of Octave.
 *
 * Octave is free software; you can redistribute it and/or modify it under the
 * terms of the GNU General Public License as published by the Free Software
 * Foundation; either version 3 of the License, or (at your option) any later
 * version.
 *
 * Octave is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License along with
 * Octave; see the file COPYING.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "defun-dld.h"
#include "parse.h"

// Secondary functions specialiced for complex or real case used
// in icholt algorithms.
template < typename T > inline T
ichol_mult_complex (T a, T b)
{
  b.imag (-std::imag (b));
  return a * b;
}

template < typename T > inline bool
ichol_checkpivot_complex (T pivot)
{
  if (pivot.imag () != 0)
    {
      error ("icholt: Non-real pivot encountered. \
              The matrix must be hermitian");
      return false;
    }
  else if (pivot.real () < 0)
    {
      error ("icholt: Non-positive pivot encountered.");
      return false;
    }
  return true;

}

template < typename T > inline bool
ichol_checkpivot_real (T pivot)
{
  if (pivot < T (0))
    {
      error ("icholt: Non-positive pivot encountered.");
      return false;
    }
  return true;
}

template < typename T> inline T 
ichol_mult_real (T a, T b)
{
  return a * b;
}


template <typename octave_matrix_t, typename T,  T (*ichol_mult) (T, T), 
          bool (*ichol_checkpivot) (T)>
void ichol_t (const octave_matrix_t& sm, octave_matrix_t& L, const T* cols_norm,
              const T droptol, const std::string michol = "off")
              
{

  const octave_idx_type n = sm.cols ();
  octave_idx_type j, jrow, jend, jjrow, jw, i, k, jj, Llist_len, total_len, w_len,
                  max_len, ind;

  char opt;
  enum {OFF, ON};
  if (michol == "on")
    opt = ON;
  else
    opt = OFF;

  // Input matrix pointers
  octave_idx_type* cidx = sm.cidx ();
  octave_idx_type* ridx = sm.ridx ();
  T* data = sm.data ();

  // Output matrix data structures. Because it is not known the 
  // final zero pattern of the output matrix due to fill-in elements,
  // an heuristic approach has been adopted for memory allocation. The 
  // size of ridx_out_l and data_out_l is incremented 10% of their actual
  // size (nnz(A) in the beginning).  If that amount is less than n, their
  // size is just incremented in n elements. This way the number of
  // reallocations decrease throughout the process, obtaining a good performance.
  max_len = sm.nnz ();
  max_len += (0.1 * max_len) > n ? 0.1 * max_len : n;
  Array <octave_idx_type> cidx_out_l (dim_vector (n + 1,1));
  octave_idx_type* cidx_l = cidx_out_l.fortran_vec ();
  Array <octave_idx_type> ridx_out_l (dim_vector (max_len ,1));
  octave_idx_type* ridx_l = ridx_out_l.fortran_vec ();
  Array <T> data_out_l (dim_vector (max_len, 1));
  T* data_l = data_out_l.fortran_vec ();

  // Working arrays
  OCTAVE_LOCAL_BUFFER (T, w_data, n);
  OCTAVE_LOCAL_BUFFER (octave_idx_type, Lfirst, n);
  OCTAVE_LOCAL_BUFFER (octave_idx_type, Llist, n);
  OCTAVE_LOCAL_BUFFER (T, col_drops, n);
  std::vector <octave_idx_type> vec;
  vec.resize (n);


  T zero = T (0);
  cidx_l[0] = cidx[0];
  for (i = 0; i < n; i++)
    {
      Llist[i] = -1;
      Lfirst[i] = -1;
      w_data[i] = 0;
      col_drops[i] = zero;
      vec[i] = 0;
    }

  total_len = 0;
  for (k = 0; k < n; k++)
    {
      ind = 0;
      for (j = cidx[k]; j < cidx[k+1]; j++)
        {
          w_data[ridx[j]] = data[j];
          if (ridx[j] != k)
            {
              vec[ind] = ridx[j];
              ind++;
            }
        }
      jrow = Llist[k];
      while (jrow != -1) 
        {
          jjrow = Lfirst[jrow];
          jend = cidx_l[jrow+1];
          for (jj = jjrow; jj < jend; jj++)
            {
              j = ridx_l[jj];
              // If the element in the j position of the row is zero,
              // then it will become non-zero, so we add it to the 
              // vector that keeps track of non-zero elements in the working row.
              if (w_data[j] == zero)
                {
                  vec[ind] = j; 
                  ind++;
                }
              w_data[j] -=  ichol_mult (data_l[jj], data_l[jjrow]);

            }
          // Update the actual column first element and update the 
          // linked list of the jrow row.
          if ((jjrow + 1) < jend)
            {
              Lfirst[jrow]++;
              j = jrow;
              jrow = Llist[jrow];
              Llist[j] = Llist[ridx_l[Lfirst[j]]];
              Llist[ridx_l[Lfirst[j]]] = j;
            }
          else
            jrow = Llist[jrow];
        }

      // Resizing output arrays
      if ((max_len - total_len) < n)
        {
          max_len += (0.1 * max_len) > n ? 0.1 * max_len : n;
          data_out_l.resize (dim_vector (max_len, 1));
          data_l = data_out_l.fortran_vec ();
          ridx_out_l.resize (dim_vector (max_len, 1));
          ridx_l = ridx_out_l.fortran_vec ();
        }
      
      // The sorting of the non-zero elements of the working column can be
      // handled in a couple of ways. The most efficient two I found, are 
      // keeping the elements in an ordered binary search tree dinamically 
      // or keep them unsorted in a vector and at the end of the outer 
      // iteration order them. The last approach exhibit lower execution 
      // times.   
      std::sort (vec.begin (), vec.begin () + ind);

      data_l[total_len] = w_data[k];
      ridx_l[total_len] = k;
      w_len = 1;

      // Extract then non-zero elements of working column and drop the
      // elements that are lower than droptol * cols_norm[k].
      for (i = 0; i < ind ; i++)
        {
          jrow = vec[i];
          if (w_data[jrow] != zero)
            {
              if (std::abs (w_data[jrow]) < (droptol * cols_norm[k]))
                {
                  if (opt == ON)
                    {
                      col_drops[k] += w_data[jrow];
                      col_drops[jrow] += w_data[jrow];
                    }
                }
              else
                {
                  data_l[total_len + w_len] = w_data[jrow];
                  ridx_l[total_len + w_len] = jrow;
                  w_len++;
                }
              vec[i] = 0;
            }
          w_data[jrow] = zero;
        }

      // Compensate column sums --> michol option
      if (opt == ON)
        data_l[total_len] += col_drops[k];

      if (data_l[total_len] == zero)
        {
          error ("icholt: There is a pivot equal to zero.");
          break;
        }
      else if (!ichol_checkpivot (data_l[total_len]))
        break;

      // Once the elements are dropped and compensation of columns 
      // sums are done, scale the elements by the pivot.
      data_l[total_len] = std::sqrt (data_l[total_len]);
      for (jj = total_len + 1; jj < (total_len + w_len); jj++)
        data_l[jj] /=  data_l[total_len];
      total_len += w_len;
      cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len;

      // Update Llist and Lfirst with the k-column information.
      if (k < (n - 1)) 
        {
          Lfirst[k] = cidx_l[k];
          if ((Lfirst[k] + 1) < cidx_l[k+1])
            {
              Lfirst[k]++;
              jjrow = ridx_l[Lfirst[k]];
              Llist[k] = Llist[jjrow];
              Llist[jjrow] = k;
            }
        }
        
      }

  if (! error_state)
    {
      // Build the output matrices
      L = octave_matrix_t (n, n, total_len);
      for (i = 0; i <= n; i++)
        L.cidx (i) = cidx_l[i];
      for (i = 0; i < total_len; i++)
        {
          L.ridx (i) = ridx_l[i];
          L.data (i) = data_l[i];
        }
    }

}

DEFUN_DLD (icholt, args, nargout, "-*- texinfo -*-\n\
@deftypefn  {Loadable Function} {@var{L} =} icholt (@var{A}, @var{droptol}, @var{michol})\n\
\n\
Computes the thresholded Incomplete Cholesky factorization [ICT] of A \
which must be an square hermitian matrix in the complex case and a symmetric \
positive definite matrix in the real one. \
\n\
@code{[@var{L}] = icholt (@var{A}, @var{droptol}, @var{michol})} \
computes the ICT of @var{A}, such that @code{@var{L} * @var{L}'} is an \
approximation of the square sparse hermitian matrix @var{A}. @var{droptol} is \
a non-negative scalar used as a drop tolerance when performing ICT. Elements \
which are smaller in magnitude than @code{@var{droptol} * norm(@var{A}(j:end, j), 1)} \
, are dropped from the resulting factor @var{L}. The parameter @var{michol} \
decides whether the Modified IC(0) should be performed. This compensates the \
main diagonal of @var{L}, such that @code{@var{A} * @var{e} = @var{L} * @var{L}' \
 * @var{e}} with @code{@var{e} = ones (size (@var{A}, 2), 1))} holds. \n\
\n\
For more information about the algorithms themselves see:\n\
\n\
[1] Saad, Yousef. \"Preconditioning Techniques.\" Iterative Methods for Sparse Linear \
Systems. PWS Publishing Company, 1996. \
\n\
\n\
[2] Jones, Mark T. and Plassmann, Paul E.: An Improved Incomplete Cholesky \
Factorization (1992). \
\n\
@seealso{ichol, ichol0, chol, ilu}\n\
@end deftypefn")
{
  octave_value_list retval;

  int nargin = args.length ();
  // Default values of parameters
  std::string michol = "off";
  double droptol = 0;
 

  if (nargout > 1 || nargin < 1 || nargin > 3)
    {
      print_usage ();
      return retval;
    }

  if (args (0).is_scalar_type () || !args (0).is_sparse_type ())
    error ("icholt: 1. parameter must be a sparse square matrix.");

  if (args (0).is_empty ())
    {
      retval (0) = octave_value (SparseMatrix ());
      return retval;
    }

  if (! error_state && (nargin >= 2))
    {
      droptol = args (1).double_value ();
      if (error_state || (droptol < 0) || ! args (1).is_real_scalar ())
        error ("icholt: 2. parameter must be a positive real scalar.");
    }

  if (! error_state && (nargin == 3))
    {
      michol = args (2).string_value ();
      if (error_state || !(michol == "on" || michol == "off"))
        error ("icholt: 3. parameter must be 'on' or 'off' character string.");
    }

  if (!error_state)
    {
      octave_value_list param_list;
      if (! args (0).is_complex_type ())
        {
          Array <double> cols_norm;
          SparseMatrix L;
          param_list.append (args (0).sparse_matrix_value ());
          SparseMatrix sm_l = feval ("tril", 
                                     param_list) (0).sparse_matrix_value (); 
          param_list (0) = sm_l;
          param_list (1) = 1;
          param_list (2) = "cols";
          cols_norm = feval ("norm", param_list) (0).vector_value ();
          param_list.clear ();
          ichol_t <SparseMatrix, 
                   double, ichol_mult_real, ichol_checkpivot_real> 
                   (sm_l, L, cols_norm.fortran_vec (), droptol, michol);
          if (! error_state)
            retval (0) = octave_value (L);
        }
      else
        {
          Array <Complex> cols_norm;
          SparseComplexMatrix L;
          param_list.append (args (0).sparse_complex_matrix_value ());
          SparseComplexMatrix sm_l = feval ("tril", 
                                            param_list) (0).sparse_complex_matrix_value (); 
          param_list (0) = sm_l;
          param_list (1) = 1;
          param_list (2) = "cols";
          cols_norm = feval ("norm", param_list) (0).complex_vector_value ();
          param_list.clear ();
          ichol_t <SparseComplexMatrix, 
                   Complex, ichol_mult_complex, ichol_checkpivot_complex> 
                   (sm_l, L, cols_norm.fortran_vec (), Complex (droptol), michol);
          if (! error_state)
            retval (0) = octave_value (L);
        }

    }

  return retval;
}

/*
%% Real matrices
%!shared A_1, A_2, A_3, A_4, A_5
%! A_1 = [ 0.37, -0.05,  -0.05,  -0.07;
%!        -0.05,  0.116,  0.0,   -0.05;
%!        -0.05,  0.0,    0.116, -0.05;
%!        -0.07, -0.05,  -0.05,   0.202];
%! A_1 = sparse(A_1);
%!
%! A_2 = gallery ('poisson', 30);
%!
%! A_3 = gallery ('tridiag', 50);
%!
%! nx = 400; ny = 200;
%! hx = 1 / (nx + 1); hy = 1 / (ny + 1);
%! Dxx = spdiags ([ones(nx, 1), -2 * ones(nx, 1), ones(nx, 1)], [-1 0 1 ], nx, nx) / (hx ^ 2);
%! Dyy = spdiags ([ones(ny, 1), -2 * ones(ny, 1), ones(ny, 1)], [-1 0 1 ], ny, ny) / (hy ^ 2);
%! A_4 = -kron (Dxx, speye (ny)) - kron (speye (nx), Dyy);
%! A_4 = sparse (A_4);
%!
%! A_5 = [ 0.37, -0.05,          -0.05,  -0.07;
%!        -0.05,  0.116,          0.0,   -0.05 + 0.05i;
%!        -0.05,  0.0,            0.116, -0.05;
%!        -0.07, -0.05 - 0.05i,  -0.05,   0.202];
%! A_5 = sparse(A_5);
%! A_6 = [ 0.37,    -0.05 - i, -0.05,  -0.07;
%!        -0.05 + i, 0.116,     0.0,   -0.05;
%!        -0.05,     0.0,       0.116, -0.05;
%!        -0.07,    -0.05,     -0.05,   0.202];
%! A_6 = sparse(A_6);
%! A_7 = A_5;
%! A_7(1) = 2i;
%!
%!test
%!error icholt ([]);
%!error icholt ([],[]);
%!error icholt ([],[],[]);
%!error [~] = icholt ([],[],[]);
%!error [L] = icholt ([],[],[]);
%!error [L] = icholt ([], 1e-4, 1);
%!error [L] = icholt (A_1, [], 'off');
%!error [L] = icholt (A_1, 1e-4, []);
%!error [L, E] = icholt (A_1, 1e-4, 'off');
%!error [L] = icholt (A_1, 1e-4, 'off', A_1);
%!error icholt (sparse (0), 1e-4, 'off');
%!error icholt (sparse (-0), 1e-4, 'off');
%!error icholt (sparse (-1), 1e-4, 'off');
%!error icholt (sparse (i), 1e-4, 'off');
%!error icholt (sparse (-i), 1e-4, 'off');
%!error icholt (sparse (1 + 1i), 1e-4, 'off');
%!error icholt (sparse (1 - 1i), 1e-4, 'off');
%!
%!test
%! L = icholt (sparse (1), 1e-4, 'off');
%! assert (L, sparse (1));
%! L = icholt (sparse (4), 1e-4, 'off');
%! assert (L, sparse (2));
%!
%!test
%! L = icholt (A_1, 1e-4, 'off');
%! assert (norm (A_1 - L*L', 'fro') / norm (A_1, 'fro'), eps, eps);
%! L = icholt (A_1, 1e-4, 'on');
%! assert (norm (A_1 - L*L', 'fro') / norm (A_1, 'fro'), eps, eps);
%!
%!test
%! L = icholt (A_2, 1e-4, 'off');
%! assert (norm (A_2 - L*L', 'fro') / norm (A_2, 'fro'), 1e-4, 1e-4);
%! L = icholt (A_2, 1e-4, 'on');
%! assert (norm (A_2 - L*L', 'fro') / norm (A_2, 'fro'), 3e-4, 1e-4);
%!
%!test
%! L = icholt (A_3, 1e-4, 'off');
%! assert (norm (A_3 - L*L', 'fro') / norm (A_3, 'fro'), eps, eps);
%! L = icholt (A_3, 1e-4, 'on');
%! assert (norm (A_3 - L*L', 'fro') / norm (A_3, 'fro'), eps, eps);
%!
%!test
%! L = icholt (A_4, 1e-4, 'off');
%! assert (norm (A_4 - L*L', 'fro') / norm (A_4, 'fro'), 2e-4, 1e-4);
%! L = icholt (A_4, 1e-4, 'on');
%! assert (norm (A_4 - L*L', 'fro') / norm (A_4, 'fro'), 7e-4, 1e-4);
%!
%% Complex matrices
%!test
%! L = ichol0 (A_5, 'off');
%! assert (norm (A_5 - L*L', 'fro') / norm (A_5, 'fro'), 1e-2, 1e-2);
%! L = ichol0 (A_5, 'on');
%! assert (norm (A_5 - L*L', 'fro') / norm (A_5, 'fro'), 2e-2, 1e-2);
%% Negative pivot 
%!error ichol0 (A_6, 'off');
%% Complex entry in the diagonal
%!error ichol0 (A_7, 'off');
*/