octave-nkf: src/DLD-FUNCTIONS/sqrtm.cc comparison

comparison src/DLD-FUNCTIONS/sqrtm.cc @ 10608:f9860b622680

improve sqrtm

author	Jaroslav Hajek <highegg@gmail.com>
date	Thu, 06 May 2010 13:32:08 +0200
parents	d0ce5e973937
children	9f0a264d2f60

comparison

equal deleted inserted replaced

-:f7501986e42d
+:f9860b622680
 /*
 Copyright (C) 2001, 2003, 2005, 2006, 2007, 2008 Ross Lippert and Paul Kienzle
+Copyright (C) 2010 VZLU Prague
 This file is part of Octave.
 Octave is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
 #include "CmplxSCHUR.h"
 #include "fCmplxSCHUR.h"
 #include "lo-ieee.h"
 #include "lo-mappers.h"
+#include "oct-norm.h"
 #include "defun-dld.h"
 #include "error.h"
 #include "gripes.h"
 #include "utils.h"
+#include "xnorm.h"
-template <class T>
-static inline T
+template <class Matrix>
-getmin (T x, T y)
+static void
+sqrtm_utri_inplace (Matrix& T)
 {
-return x < y ? x : y;
+typedef typename Matrix::element_type element_type;
+const element_type zero = element_type ();
+bool singular = false;
+/*
+* the following code is equivalent to this triple loop:
+*
+*  n = rows (T);
+*  for j = 1:n
+*    T(j,j) = sqrt (T(j,j));
+*    for i = j-1:-1:1
+*      T(i,j) /= (T(i,i) + T(j,j));
+*      k = 1:i-1;
+*      T(k,j) -= T(k,i) * T(i,j);
+*    endfor
+*  endfor
+*
+*  this is an in-place, cache-aligned variant of the code
+*  given in Higham's paper.
+*/
+const octave_idx_type n = T.rows ();
+element_type *Tp = T.fortran_vec ();
+for (octave_idx_type j = 0; j < n; j++)
+{
+element_type *colj = Tp + n*j;
+if (colj[j] != zero)
+colj[j] = sqrt (colj[j]);
+else
+singular = true;
+for (octave_idx_type i = j-1; i >= 0; i--)
+{
+const element_type *coli = Tp + n*i;
+const element_type colji = colj[i] /= (coli[i] + colj[j]);
+for (octave_idx_type k = 0; k < i; k++)
+colj[k] -= coli[k] * colji;
+}
+}
+if (singular)
+warning ("sqrtm: matrix is singular, may not have a square root");
 }
-template <class T>
+template <class Matrix, class ComplexMatrix, class ComplexSCHUR>
-static inline T
+static octave_value
-getmax (T x, T y)
+do_sqrtm (const octave_value& arg)
 {
-return x > y ? x : y;
-}
+octave_value retval;
-static double
+MatrixType mt = arg.matrix_type ();
-frobnorm (const ComplexMatrix& A)
-{
+bool iscomplex = arg.is_complex_type ();
-double sum = 0;
+typedef typename Matrix::element_type real_type;
-for (octave_idx_type i = 0; i < A.rows (); i++)
-for (octave_idx_type j = 0; j < A.columns (); j++)
+real_type cutoff = 0, one = 1;
-sum += real (A(i,j) * conj (A(i,j)));
+real_type eps = std::numeric_limits<real_type>::epsilon ();
-return sqrt (sum);
+if (! iscomplex)
-}
+{
+Matrix x = octave_value_extract<Matrix> (arg);
-static double
-frobnorm (const Matrix& A)
+if (mt.is_unknown ()) // if type is not known, compute it now.
-{
+arg.matrix_type (mt = MatrixType (x));
-double sum = 0;
-for (octave_idx_type i = 0; i < A.rows (); i++)
+switch (mt.type ())
-for (octave_idx_type j = 0; j < A.columns (); j++)
-sum += A(i,j) * A(i,j);
-return sqrt (sum);
-}
-static float
-frobnorm (const FloatComplexMatrix& A)
-{
-float sum = 0;
-for (octave_idx_type i = 0; i < A.rows (); i++)
-for (octave_idx_type j = 0; j < A.columns (); j++)
-sum += real (A(i,j) * conj (A(i,j)));
-return sqrt (sum);
-}
-static float
-frobnorm (const FloatMatrix& A)
-{
-float sum = 0;
-for (octave_idx_type i = 0; i < A.rows (); i++)
-for (octave_idx_type j = 0; j < A.columns (); j++)
-sum += A(i,j) * A(i,j);
-return sqrt (sum);
-}
-static ComplexMatrix
-sqrtm_from_schur (const ComplexMatrix& U, const ComplexMatrix& T)
-{
-const octave_idx_type n = U.rows ();
-ComplexMatrix R (n, n, 0.0);
-for (octave_idx_type j = 0; j < n; j++)
-R(j,j) = sqrt (T(j,j));
-const double fudge = sqrt (DBL_MIN);
-for (octave_idx_type p = 0; p < n-1; p++)
-{
-for (octave_idx_type i = 0; i < n-(p+1); i++)
 {
-const octave_idx_type j = i + p + 1;
+case MatrixType::Upper:
+case MatrixType::Diagonal:
-Complex s = T(i,j);
+{
+if (! x.diag ().any_element_is_negative ())
-for (octave_idx_type k = i+1; k < j; k++)
+{
-s -= R(i,k) * R(k,j);
+// Do it in real arithmetic.
+sqrtm_utri_inplace (x);
-// dividing
+retval = x;
-//     R(i,j) = s/(R(i,i)+R(j,j));
+}
-// screwing around to not / 0
+else
+iscomplex = true;
-const Complex d = R(i,i) + R(j,j) + fudge;
-const Complex conjd = conj (d);
+break;
+}
-R(i,j) =  (s*conjd)/(d*conjd);
+case MatrixType::Lower:
+{
+if (! x.diag ().any_element_is_negative ())
+{
+x = x.transpose ();
+sqrtm_utri_inplace (x);
+retval = x.transpose ();
+}
+else
+iscomplex = true;
+break;
+}
+default:
+{
+iscomplex = true;
+break;
+}
 }
-}
+if (iscomplex)
-return U * R * U.hermitian ();
+cutoff = 10 * x.rows () * eps * xnorm (x, one);
 }
-static FloatComplexMatrix
+if (iscomplex)
-sqrtm_from_schur (const FloatComplexMatrix& U, const FloatComplexMatrix& T)
+{
-{
+ComplexMatrix x = octave_value_extract<ComplexMatrix> (arg);
-const octave_idx_type n = U.rows ();
+if (mt.is_unknown ()) // if type is not known, compute it now.
-FloatComplexMatrix R (n, n, 0.0);
+arg.matrix_type (mt = MatrixType (x));
-for (octave_idx_type j = 0; j < n; j++)
+switch (mt.type ())
-R(j,j) = sqrt (T(j,j));
-const float fudge = sqrt (FLT_MIN);
-for (octave_idx_type p = 0; p < n-1; p++)
-{
-for (octave_idx_type i = 0; i < n-(p+1); i++)
 {
-const octave_idx_type j = i + p + 1;
+case MatrixType::Upper:
+case MatrixType::Diagonal:
-FloatComplex s = T(i,j);
+{
+sqrtm_utri_inplace (x);
-for (octave_idx_type k = i+1; k < j; k++)
+retval = x;
-s -= R(i,k) * R(k,j);
+break;
-// dividing
+}
-//     R(i,j) = s/(R(i,i)+R(j,j));
+case MatrixType::Lower:
-// screwing around to not / 0
+{
+x = x.transpose ();
-const FloatComplex d = R(i,i) + R(j,j) + fudge;
+sqrtm_utri_inplace (x);
-const FloatComplex conjd = conj (d);
+retval = x.transpose ();
-R(i,j) =  (s*conjd)/(d*conjd);
+break;
+}
+default:
+{
+ComplexMatrix u;
+do
+{
+ComplexSCHUR schur (x, std::string (), true);
+x = schur.schur_matrix ();
+u = schur.unitary_matrix ();
+}
+while (0); // schur no longer needed.
+sqrtm_utri_inplace (x);
+x = u * x; // original x no longer needed.
+ComplexMatrix res = xgemm (x, u, blas_no_trans, blas_conj_trans);
+if (cutoff > 0 && xnorm (imag (res), one) <= cutoff)
+retval = real (res);
+else
+retval = res;
+break;
+}
 }
 }
-return U * R * U.hermitian ();
+return retval;
 }
 DEFUN_DLD (sqrtm, args, nargout,
 "-*- texinfo -*-\n\
 @deftypefn {Loadable Function} {[@var{result}, @var{error_estimate}] =} sqrtm (@var{a})\n\
 octave_value arg = args(0);
 octave_idx_type n = arg.rows ();
 octave_idx_type nc = arg.columns ();
-int arg_is_empty = empty_arg ("sqrtm", n, nc);
+if (n != nc || arg.ndims () > 2)
-if (arg_is_empty < 0)
-return retval;
-else if (arg_is_empty > 0)
-return octave_value (Matrix ());
-if (n != nc)
 {
 gripe_square_matrix_required ("sqrtm");
 return retval;
 }
-retval(1) = lo_ieee_inf_value ();
+if (arg.is_diag_matrix ())
-retval(0) = lo_ieee_nan_value ();
+{
+// sqrtm of a diagonal matrix is just sqrt.
+retval(0) = arg.sqrt ();
-if (arg.is_single_type ())
+}
-{
+else if (arg.is_single_type ())
-if (arg.is_real_scalar ())
+{
-{
+retval(0) = do_sqrtm<FloatMatrix, FloatComplexMatrix, FloatComplexSCHUR> (arg);
-float d = arg.float_value ();
+}
-if (d > 0.0)
+else if (arg.is_numeric_type ())
 {
-retval(0) = sqrt (d);
+retval(0) = do_sqrtm<Matrix, ComplexMatrix, ComplexSCHUR> (arg);
-retval(1) = 0.0;
+}
-}
-else
+if (nargout > 1 && ! error_state)
 {
-retval(0) = FloatComplex (0.0, sqrt (d));
+// This corresponds to generic code
-retval(1) = 0.0;
+//   norm (s*s - x, "fro") / norm (x, "fro");
-}
-}
+octave_value s = retval(0);
-else if (arg.is_complex_scalar ())
+retval(1) = xfrobnorm (s*s - arg) / xfrobnorm (arg);
-{
-FloatComplex c = arg.float_complex_value ();
-retval(0) = sqrt (c);
-retval(1) = 0.0;
-}
-else if (arg.is_matrix_type ())
-{
-float err, minT;
-if (arg.is_real_matrix ())
-{
-FloatMatrix A = arg.float_matrix_value();
-if (error_state)
-return retval;
-// FIXME -- eventually, FloatComplexSCHUR will accept a
-// real matrix arg.
-FloatComplexMatrix Ac (A);
-const FloatComplexSCHUR schur (Ac, std::string ());
-if (error_state)
-return retval;
-const FloatComplexMatrix U (schur.unitary_matrix ());
-const FloatComplexMatrix T (schur.schur_matrix ());
-const FloatComplexMatrix X (sqrtm_from_schur (U, T));
-// Check for minimal imaginary part
-float normX = 0.0;
-float imagX = 0.0;
-for (octave_idx_type i = 0; i < n; i++)
-for (octave_idx_type j = 0; j < n; j++)
-{
-imagX = getmax (imagX, imag (X(i,j)));
-normX = getmax (normX, abs (X(i,j)));
-}
-if (imagX < normX * 100 * FLT_EPSILON)
-retval(0) = real (X);
-else
-retval(0) = X;
-// Compute error
-// FIXME can we estimate the error without doing the
-// matrix multiply?
-err = frobnorm (X*X - FloatComplexMatrix (A)) / frobnorm (A);
-if (xisnan (err))
-err = lo_ieee_float_inf_value ();
-// Find min diagonal
-minT = lo_ieee_float_inf_value ();
-for (octave_idx_type i=0; i < n; i++)
-minT = getmin(minT, abs(T(i,i)));
-}
-else
-{
-FloatComplexMatrix A = arg.float_complex_matrix_value ();
-if (error_state)
-return retval;
-const FloatComplexSCHUR schur (A, std::string ());
-if (error_state)
-return retval;
-const FloatComplexMatrix U (schur.unitary_matrix ());
-const FloatComplexMatrix T (schur.schur_matrix ());
-const FloatComplexMatrix X (sqrtm_from_schur (U, T));
-retval(0) = X;
-err = frobnorm (X*X - A) / frobnorm (A);
-if (xisnan (err))
-err = lo_ieee_float_inf_value ();
-minT = lo_ieee_float_inf_value ();
-for (octave_idx_type i = 0; i < n; i++)
-minT = getmin (minT, abs (T(i,i)));
-}
-retval(1) = err;
-if (nargout < 2)
-{
-if (err > 100*(minT+FLT_EPSILON)*n)
-{
-if (minT == 0.0)
-error ("sqrtm: A is singular, sqrt may not exist");
-else if (minT <= sqrt (FLT_MIN))
-error ("sqrtm: A is nearly singular, failed to find sqrt");
-else
-error ("sqrtm: failed to find sqrt");
-}
-}
-}
-}
-else
-{
-if (arg.is_real_scalar ())
-{
-double d = arg.double_value ();
-if (d > 0.0)
-{
-retval(0) = sqrt (d);
-retval(1) = 0.0;
-}
-else
-{
-retval(0) = Complex (0.0, sqrt (d));
-retval(1) = 0.0;
-}
-}
-else if (arg.is_complex_scalar ())
-{
-Complex c = arg.complex_value ();
-retval(0) = sqrt (c);
-retval(1) = 0.0;
-}
-else if (arg.is_matrix_type ())
-{
-double err, minT;
-if (arg.is_real_matrix ())
-{
-Matrix A = arg.matrix_value();
-if (error_state)
-return retval;
-// FIXME -- eventually, ComplexSCHUR will accept a
-// real matrix arg.
-ComplexMatrix Ac (A);
-const ComplexSCHUR schur (Ac, std::string ());
-if (error_state)
-return retval;
-const ComplexMatrix U (schur.unitary_matrix ());
-const ComplexMatrix T (schur.schur_matrix ());
-const ComplexMatrix X (sqrtm_from_schur (U, T));
-// Check for minimal imaginary part
-double normX = 0.0;
-double imagX = 0.0;
-for (octave_idx_type i = 0; i < n; i++)
-for (octave_idx_type j = 0; j < n; j++)
-{
-imagX = getmax (imagX, imag (X(i,j)));
-normX = getmax (normX, abs (X(i,j)));
-}
-if (imagX < normX * 100 * DBL_EPSILON)
-retval(0) = real (X);
-else
-retval(0) = X;
-// Compute error
-// FIXME can we estimate the error without doing the
-// matrix multiply?
-err = frobnorm (X*X - ComplexMatrix (A)) / frobnorm (A);
-if (xisnan (err))
-err = lo_ieee_inf_value ();
-// Find min diagonal
-minT = lo_ieee_inf_value ();
-for (octave_idx_type i=0; i < n; i++)
-minT = getmin(minT, abs(T(i,i)));
-}
-else
-{
-ComplexMatrix A = arg.complex_matrix_value ();
-if (error_state)
-return retval;
-const ComplexSCHUR schur (A, std::string ());
-if (error_state)
-return retval;
-const ComplexMatrix U (schur.unitary_matrix ());
-const ComplexMatrix T (schur.schur_matrix ());
-const ComplexMatrix X (sqrtm_from_schur (U, T));
-retval(0) = X;
-err = frobnorm (X*X - A) / frobnorm (A);
-if (xisnan (err))
-err = lo_ieee_inf_value ();
-minT = lo_ieee_inf_value ();
-for (octave_idx_type i = 0; i < n; i++)
-minT = getmin (minT, abs (T(i,i)));
-}
-retval(1) = err;
-if (nargout < 2)
-{
-if (err > 100*(minT+DBL_EPSILON)*n)
-{
-if (minT == 0.0)
-error ("sqrtm: A is singular, sqrt may not exist");
-else if (minT <= sqrt (DBL_MIN))
-error ("sqrtm: A is nearly singular, failed to find sqrt");
-else
-error ("sqrtm: failed to find sqrt");
-}
-}
-}
-else
-gripe_wrong_type_arg ("sqrtm", arg);
 }
 return retval;
 }

Mercurial > octave-nkf

comparison src/DLD-FUNCTIONS/sqrtm.cc @ 10608:f9860b622680