Mercurial > octave
diff liboctave/fMatrix.cc @ 7789:82be108cc558
First attempt at single precision tyeps
* * *
corrections to qrupdate single precision routines
* * *
prefer demotion to single over promotion to double
* * *
Add single precision support to log2 function
* * *
Trivial PROJECT file update
* * *
Cache optimized hermitian/transpose methods
* * *
Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author | David Bateman <dbateman@free.fr> |
---|---|
date | Sun, 27 Apr 2008 22:34:17 +0200 |
parents | |
children | f42c6f8d6d8e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fMatrix.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,3406 @@ +// Matrix manipulations. +/* + +Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, + 2003, 2004, 2005, 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +<http://www.gnu.org/licenses/>. + +*/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <cfloat> + +#include <iostream> +#include <vector> + +#include "Array-util.h" +#include "byte-swap.h" +#include "fMatrix.h" +#include "floatDET.h" +#include "floatSCHUR.h" +#include "floatSVD.h" +#include "floatCHOL.h" +#include "f77-fcn.h" +#include "functor.h" +#include "lo-error.h" +#include "lo-ieee.h" +#include "lo-mappers.h" +#include "lo-utils.h" +#include "mx-base.h" +#include "mx-fm-fdm.h" +#include "mx-fdm-fm.h" +#include "mx-inlines.cc" +#include "oct-cmplx.h" +#include "quit.h" + +#if defined (HAVE_FFTW3) +#include "oct-fftw.h" +#endif + +// Fortran functions we call. + +extern "C" +{ + F77_RET_T + F77_FUNC (xilaenv, XILAENV) (const octave_idx_type&, F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, + const octave_idx_type&, const octave_idx_type&, + octave_idx_type& + F77_CHAR_ARG_LEN_DECL F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (sgebal, SGEBAL) (F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, float*, const octave_idx_type&, octave_idx_type&, + octave_idx_type&, float*, octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (sgebak, SGEBAK) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, float*, + const octave_idx_type&, float*, const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + + F77_RET_T + F77_FUNC (sgemm, SGEMM) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, + const float&, const float*, const octave_idx_type&, + const float*, const octave_idx_type&, const float&, + float*, const octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (sgemv, SGEMV) (F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const float&, + const float*, const octave_idx_type&, const float*, + const octave_idx_type&, const float&, float*, + const octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (xsdot, XSDOT) (const octave_idx_type&, const float*, const octave_idx_type&, + const float*, const octave_idx_type&, float&); + + F77_RET_T + F77_FUNC (sgetrf, SGETRF) (const octave_idx_type&, const octave_idx_type&, float*, const octave_idx_type&, + octave_idx_type*, octave_idx_type&); + + F77_RET_T + F77_FUNC (sgetrs, SGETRS) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, const octave_idx_type&, + const float*, const octave_idx_type&, + const octave_idx_type*, float*, const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (sgetri, SGETRI) (const octave_idx_type&, float*, const octave_idx_type&, const octave_idx_type*, + float*, const octave_idx_type&, octave_idx_type&); + + F77_RET_T + F77_FUNC (sgecon, SGECON) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, float*, + const octave_idx_type&, const float&, float&, + float*, octave_idx_type*, octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (sgelsy, SGELSY) (const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, + float*, const octave_idx_type&, float*, + const octave_idx_type&, octave_idx_type*, float&, octave_idx_type&, + float*, const octave_idx_type&, octave_idx_type&); + + F77_RET_T + F77_FUNC (sgelsd, SGELSD) (const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, + float*, const octave_idx_type&, float*, + const octave_idx_type&, float*, float&, octave_idx_type&, + float*, const octave_idx_type&, octave_idx_type*, + octave_idx_type&); + + F77_RET_T + F77_FUNC (spotrf, SPOTRF) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + float *, const octave_idx_type&, + octave_idx_type& F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (spocon, SPOCON) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + float*, const octave_idx_type&, const float&, + float&, float*, octave_idx_type*, + octave_idx_type& F77_CHAR_ARG_LEN_DECL); + F77_RET_T + F77_FUNC (spotrs, SPOTRS) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + const octave_idx_type&, const float*, + const octave_idx_type&, float*, + const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (strtri, STRTRI) (F77_CONST_CHAR_ARG_DECL, F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const float*, + const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + F77_RET_T + F77_FUNC (strcon, STRCON) (F77_CONST_CHAR_ARG_DECL, F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + const float*, const octave_idx_type&, float&, + float*, octave_idx_type*, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + F77_RET_T + F77_FUNC (strtrs, STRTRS) (F77_CONST_CHAR_ARG_DECL, F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + const octave_idx_type&, const float*, + const octave_idx_type&, float*, + const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + // Note that the original complex fft routines were not written for + // float complex arguments. They have been modified by adding an + // implicit float precision (a-h,o-z) statement at the beginning of + // each subroutine. + + F77_RET_T + F77_FUNC (cffti, CFFTI) (const octave_idx_type&, FloatComplex*); + + F77_RET_T + F77_FUNC (cfftf, CFFTF) (const octave_idx_type&, FloatComplex*, FloatComplex*); + + F77_RET_T + F77_FUNC (cfftb, CFFTB) (const octave_idx_type&, FloatComplex*, FloatComplex*); + + F77_RET_T + F77_FUNC (slartg, SLARTG) (const float&, const float&, float&, + float&, float&); + + F77_RET_T + F77_FUNC (strsyl, STRSYL) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, + const float*, const octave_idx_type&, const float*, + const octave_idx_type&, const float*, const octave_idx_type&, + float&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (xslange, XSLANGE) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + const octave_idx_type&, const float*, + const octave_idx_type&, float*, float& + F77_CHAR_ARG_LEN_DECL); +} + +// Matrix class. + +FloatMatrix::FloatMatrix (const FloatRowVector& rv) + : MArray2<float> (1, rv.length (), 0.0) +{ + for (octave_idx_type i = 0; i < rv.length (); i++) + elem (0, i) = rv.elem (i); +} + +FloatMatrix::FloatMatrix (const FloatColumnVector& cv) + : MArray2<float> (cv.length (), 1, 0.0) +{ + for (octave_idx_type i = 0; i < cv.length (); i++) + elem (i, 0) = cv.elem (i); +} + +FloatMatrix::FloatMatrix (const FloatDiagMatrix& a) + : MArray2<float> (a.rows (), a.cols (), 0.0) +{ + for (octave_idx_type i = 0; i < a.length (); i++) + elem (i, i) = a.elem (i, i); +} + +// FIXME -- could we use a templated mixed-type copy function +// here? + +FloatMatrix::FloatMatrix (const boolMatrix& a) + : MArray2<float> (a.rows (), a.cols ()) +{ + for (octave_idx_type i = 0; i < a.rows (); i++) + for (octave_idx_type j = 0; j < a.cols (); j++) + elem (i, j) = a.elem (i, j); +} + +FloatMatrix::FloatMatrix (const charMatrix& a) + : MArray2<float> (a.rows (), a.cols ()) +{ + for (octave_idx_type i = 0; i < a.rows (); i++) + for (octave_idx_type j = 0; j < a.cols (); j++) + elem (i, j) = a.elem (i, j); +} + +bool +FloatMatrix::operator == (const FloatMatrix& a) const +{ + if (rows () != a.rows () || cols () != a.cols ()) + return false; + + return mx_inline_equal (data (), a.data (), length ()); +} + +bool +FloatMatrix::operator != (const FloatMatrix& a) const +{ + return !(*this == a); +} + +bool +FloatMatrix::is_symmetric (void) const +{ + if (is_square () && rows () > 0) + { + for (octave_idx_type i = 0; i < rows (); i++) + for (octave_idx_type j = i+1; j < cols (); j++) + if (elem (i, j) != elem (j, i)) + return false; + + return true; + } + + return false; +} + +FloatMatrix& +FloatMatrix::insert (const FloatMatrix& a, octave_idx_type r, octave_idx_type c) +{ + Array2<float>::insert (a, r, c); + return *this; +} + +FloatMatrix& +FloatMatrix::insert (const FloatRowVector& a, octave_idx_type r, octave_idx_type c) +{ + octave_idx_type a_len = a.length (); + + if (r < 0 || r >= rows () || c < 0 || c + a_len > cols ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (r, c+i) = a.elem (i); + } + + return *this; +} + +FloatMatrix& +FloatMatrix::insert (const FloatColumnVector& a, octave_idx_type r, octave_idx_type c) +{ + octave_idx_type a_len = a.length (); + + if (r < 0 || r + a_len > rows () || c < 0 || c >= cols ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (r+i, c) = a.elem (i); + } + + return *this; +} + +FloatMatrix& +FloatMatrix::insert (const FloatDiagMatrix& a, octave_idx_type r, octave_idx_type c) +{ + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (r < 0 || r + a_nr > rows () || c < 0 || c + a_nc > cols ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + fill (0.0, r, c, r + a_nr - 1, c + a_nc - 1); + + octave_idx_type a_len = a.length (); + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (r+i, c+i) = a.elem (i, i); + } + + return *this; +} + +FloatMatrix& +FloatMatrix::fill (float val) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + make_unique (); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + xelem (i, j) = val; + } + + return *this; +} + +FloatMatrix& +FloatMatrix::fill (float val, octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (r1 < 0 || r2 < 0 || c1 < 0 || c2 < 0 + || r1 >= nr || r2 >= nr || c1 >= nc || c2 >= nc) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + if (r1 > r2) { octave_idx_type tmp = r1; r1 = r2; r2 = tmp; } + if (c1 > c2) { octave_idx_type tmp = c1; c1 = c2; c2 = tmp; } + + if (r2 >= r1 && c2 >= c1) + { + make_unique (); + + for (octave_idx_type j = c1; j <= c2; j++) + for (octave_idx_type i = r1; i <= r2; i++) + xelem (i, j) = val; + } + + return *this; +} + +FloatMatrix +FloatMatrix::append (const FloatMatrix& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nr != a.rows ()) + { + (*current_liboctave_error_handler) ("row dimension mismatch for append"); + return FloatMatrix (); + } + + octave_idx_type nc_insert = nc; + FloatMatrix retval (nr, nc + a.cols ()); + retval.insert (*this, 0, 0); + retval.insert (a, 0, nc_insert); + return retval; +} + +FloatMatrix +FloatMatrix::append (const FloatRowVector& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nr != 1) + { + (*current_liboctave_error_handler) ("row dimension mismatch for append"); + return FloatMatrix (); + } + + octave_idx_type nc_insert = nc; + FloatMatrix retval (nr, nc + a.length ()); + retval.insert (*this, 0, 0); + retval.insert (a, 0, nc_insert); + return retval; +} + +FloatMatrix +FloatMatrix::append (const FloatColumnVector& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nr != a.length ()) + { + (*current_liboctave_error_handler) ("row dimension mismatch for append"); + return FloatMatrix (); + } + + octave_idx_type nc_insert = nc; + FloatMatrix retval (nr, nc + 1); + retval.insert (*this, 0, 0); + retval.insert (a, 0, nc_insert); + return retval; +} + +FloatMatrix +FloatMatrix::append (const FloatDiagMatrix& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nr != a.rows ()) + { + (*current_liboctave_error_handler) ("row dimension mismatch for append"); + return *this; + } + + octave_idx_type nc_insert = nc; + FloatMatrix retval (nr, nc + a.cols ()); + retval.insert (*this, 0, 0); + retval.insert (a, 0, nc_insert); + return retval; +} + +FloatMatrix +FloatMatrix::stack (const FloatMatrix& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nc != a.cols ()) + { + (*current_liboctave_error_handler) + ("column dimension mismatch for stack"); + return FloatMatrix (); + } + + octave_idx_type nr_insert = nr; + FloatMatrix retval (nr + a.rows (), nc); + retval.insert (*this, 0, 0); + retval.insert (a, nr_insert, 0); + return retval; +} + +FloatMatrix +FloatMatrix::stack (const FloatRowVector& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nc != a.length ()) + { + (*current_liboctave_error_handler) + ("column dimension mismatch for stack"); + return FloatMatrix (); + } + + octave_idx_type nr_insert = nr; + FloatMatrix retval (nr + 1, nc); + retval.insert (*this, 0, 0); + retval.insert (a, nr_insert, 0); + return retval; +} + +FloatMatrix +FloatMatrix::stack (const FloatColumnVector& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nc != 1) + { + (*current_liboctave_error_handler) + ("column dimension mismatch for stack"); + return FloatMatrix (); + } + + octave_idx_type nr_insert = nr; + FloatMatrix retval (nr + a.length (), nc); + retval.insert (*this, 0, 0); + retval.insert (a, nr_insert, 0); + return retval; +} + +FloatMatrix +FloatMatrix::stack (const FloatDiagMatrix& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nc != a.cols ()) + { + (*current_liboctave_error_handler) + ("column dimension mismatch for stack"); + return FloatMatrix (); + } + + octave_idx_type nr_insert = nr; + FloatMatrix retval (nr + a.rows (), nc); + retval.insert (*this, 0, 0); + retval.insert (a, nr_insert, 0); + return retval; +} + +FloatMatrix +real (const FloatComplexMatrix& a) +{ + octave_idx_type a_len = a.length (); + FloatMatrix retval; + if (a_len > 0) + retval = FloatMatrix (mx_inline_real_dup (a.data (), a_len), + a.rows (), a.cols ()); + return retval; +} + +FloatMatrix +imag (const FloatComplexMatrix& a) +{ + octave_idx_type a_len = a.length (); + FloatMatrix retval; + if (a_len > 0) + retval = FloatMatrix (mx_inline_imag_dup (a.data (), a_len), + a.rows (), a.cols ()); + return retval; +} + +FloatMatrix +FloatMatrix::extract (octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2) const +{ + if (r1 > r2) { octave_idx_type tmp = r1; r1 = r2; r2 = tmp; } + if (c1 > c2) { octave_idx_type tmp = c1; c1 = c2; c2 = tmp; } + + octave_idx_type new_r = r2 - r1 + 1; + octave_idx_type new_c = c2 - c1 + 1; + + FloatMatrix result (new_r, new_c); + + for (octave_idx_type j = 0; j < new_c; j++) + for (octave_idx_type i = 0; i < new_r; i++) + result.xelem (i, j) = elem (r1+i, c1+j); + + return result; +} + +FloatMatrix +FloatMatrix::extract_n (octave_idx_type r1, octave_idx_type c1, octave_idx_type nr, octave_idx_type nc) const +{ + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + result.xelem (i, j) = elem (r1+i, c1+j); + + return result; +} + +// extract row or column i. + +FloatRowVector +FloatMatrix::row (octave_idx_type i) const +{ + octave_idx_type nc = cols (); + if (i < 0 || i >= rows ()) + { + (*current_liboctave_error_handler) ("invalid row selection"); + return FloatRowVector (); + } + + FloatRowVector retval (nc); + for (octave_idx_type j = 0; j < nc; j++) + retval.xelem (j) = elem (i, j); + + return retval; +} + +FloatColumnVector +FloatMatrix::column (octave_idx_type i) const +{ + octave_idx_type nr = rows (); + if (i < 0 || i >= cols ()) + { + (*current_liboctave_error_handler) ("invalid column selection"); + return FloatColumnVector (); + } + + FloatColumnVector retval (nr); + for (octave_idx_type j = 0; j < nr; j++) + retval.xelem (j) = elem (j, i); + + return retval; +} + +FloatMatrix +FloatMatrix::inverse (void) const +{ + octave_idx_type info; + float rcond; + MatrixType mattype (*this); + return inverse (mattype, info, rcond, 0, 0); +} + +FloatMatrix +FloatMatrix::inverse (octave_idx_type& info) const +{ + float rcond; + MatrixType mattype (*this); + return inverse (mattype, info, rcond, 0, 0); +} + +FloatMatrix +FloatMatrix::inverse (octave_idx_type& info, float& rcond, int force, + int calc_cond) const +{ + MatrixType mattype (*this); + return inverse (mattype, info, rcond, force, calc_cond); +} + +FloatMatrix +FloatMatrix::inverse (MatrixType& mattype) const +{ + octave_idx_type info; + float rcond; + return inverse (mattype, info, rcond, 0, 0); +} + +FloatMatrix +FloatMatrix::inverse (MatrixType &mattype, octave_idx_type& info) const +{ + float rcond; + return inverse (mattype, info, rcond, 0, 0); +} + +FloatMatrix +FloatMatrix::tinverse (MatrixType &mattype, octave_idx_type& info, float& rcond, + int force, int calc_cond) const +{ + FloatMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr != nc || nr == 0 || nc == 0) + (*current_liboctave_error_handler) ("inverse requires square matrix"); + else + { + int typ = mattype.type (); + char uplo = (typ == MatrixType::Lower ? 'L' : 'U'); + char udiag = 'N'; + retval = *this; + float *tmp_data = retval.fortran_vec (); + + F77_XFCN (strtri, STRTRI, (F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&udiag, 1), + nr, tmp_data, nr, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + // Throw-away extra info LAPACK gives so as to not change output. + rcond = 0.0; + if (info != 0) + info = -1; + else if (calc_cond) + { + octave_idx_type dtrcon_info = 0; + char job = '1'; + + OCTAVE_LOCAL_BUFFER (float, work, 3 * nr); + OCTAVE_LOCAL_BUFFER (octave_idx_type, iwork, nr); + + F77_XFCN (strcon, STRCON, (F77_CONST_CHAR_ARG2 (&job, 1), + F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&udiag, 1), + nr, tmp_data, nr, rcond, + work, iwork, dtrcon_info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (dtrcon_info != 0) + info = -1; + } + + if (info == -1 && ! force) + retval = *this; // Restore matrix contents. + } + + return retval; +} + + +FloatMatrix +FloatMatrix::finverse (MatrixType &mattype, octave_idx_type& info, float& rcond, + int force, int calc_cond) const +{ + FloatMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr != nc || nr == 0 || nc == 0) + (*current_liboctave_error_handler) ("inverse requires square matrix"); + else + { + Array<octave_idx_type> ipvt (nr); + octave_idx_type *pipvt = ipvt.fortran_vec (); + + retval = *this; + float *tmp_data = retval.fortran_vec (); + + Array<float> z(1); + octave_idx_type lwork = -1; + + // Query the optimum work array size. + F77_XFCN (sgetri, SGETRI, (nc, tmp_data, nr, pipvt, + z.fortran_vec (), lwork, info)); + + lwork = static_cast<octave_idx_type> (z(0)); + lwork = (lwork < 2 *nc ? 2*nc : lwork); + z.resize (lwork); + float *pz = z.fortran_vec (); + + info = 0; + + // Calculate the norm of the matrix, for later use. + float anorm = 0; + if (calc_cond) + anorm = retval.abs().sum().row(static_cast<octave_idx_type>(0)).max(); + + F77_XFCN (sgetrf, SGETRF, (nc, nc, tmp_data, nr, pipvt, info)); + + // Throw-away extra info LAPACK gives so as to not change output. + rcond = 0.0; + if (info != 0) + info = -1; + else if (calc_cond) + { + octave_idx_type dgecon_info = 0; + + // Now calculate the condition number for non-singular matrix. + char job = '1'; + Array<octave_idx_type> iz (nc); + octave_idx_type *piz = iz.fortran_vec (); + F77_XFCN (sgecon, SGECON, (F77_CONST_CHAR_ARG2 (&job, 1), + nc, tmp_data, nr, anorm, + rcond, pz, piz, dgecon_info + F77_CHAR_ARG_LEN (1))); + + if (dgecon_info != 0) + info = -1; + } + + if (info == -1 && ! force) + retval = *this; // Restore matrix contents. + else + { + octave_idx_type dgetri_info = 0; + + F77_XFCN (sgetri, SGETRI, (nc, tmp_data, nr, pipvt, + pz, lwork, dgetri_info)); + + if (dgetri_info != 0) + info = -1; + } + + if (info != 0) + mattype.mark_as_rectangular(); + } + + return retval; +} + +FloatMatrix +FloatMatrix::inverse (MatrixType &mattype, octave_idx_type& info, float& rcond, + int force, int calc_cond) const +{ + int typ = mattype.type (false); + FloatMatrix ret; + + if (typ == MatrixType::Unknown) + typ = mattype.type (*this); + + if (typ == MatrixType::Upper || typ == MatrixType::Lower) + ret = tinverse (mattype, info, rcond, force, calc_cond); + else + { + if (mattype.is_hermitian ()) + { + FloatCHOL chol (*this, info, calc_cond); + if (info == 0) + { + if (calc_cond) + rcond = chol.rcond (); + else + rcond = 1.0; + ret = chol.inverse (); + } + else + mattype.mark_as_unsymmetric (); + } + + if (!mattype.is_hermitian ()) + ret = finverse(mattype, info, rcond, force, calc_cond); + + if ((mattype.is_hermitian () || calc_cond) && rcond == 0.) + ret = FloatMatrix (rows (), columns (), octave_Float_Inf); + } + + return ret; +} + +FloatMatrix +FloatMatrix::pseudo_inverse (float tol) const +{ + FloatSVD result (*this, SVD::economy); + + FloatDiagMatrix S = result.singular_values (); + FloatMatrix U = result.left_singular_matrix (); + FloatMatrix V = result.right_singular_matrix (); + + FloatColumnVector sigma = S.diag (); + + octave_idx_type r = sigma.length () - 1; + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (tol <= 0.0) + { + if (nr > nc) + tol = nr * sigma.elem (0) * DBL_EPSILON; + else + tol = nc * sigma.elem (0) * DBL_EPSILON; + } + + while (r >= 0 && sigma.elem (r) < tol) + r--; + + if (r < 0) + return FloatMatrix (nc, nr, 0.0); + else + { + FloatMatrix Ur = U.extract (0, 0, nr-1, r); + FloatDiagMatrix D = FloatDiagMatrix (sigma.extract (0, r)) . inverse (); + FloatMatrix Vr = V.extract (0, 0, nc-1, r); + return Vr * D * Ur.transpose (); + } +} + +#if defined (HAVE_FFTW3) + +FloatComplexMatrix +FloatMatrix::fourier (void) const +{ + size_t nr = rows (); + size_t nc = cols (); + + FloatComplexMatrix retval (nr, nc); + + size_t npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + const float *in (fortran_vec ()); + FloatComplex *out (retval.fortran_vec ()); + + octave_fftw::fft (in, out, npts, nsamples); + + return retval; +} + +FloatComplexMatrix +FloatMatrix::ifourier (void) const +{ + size_t nr = rows (); + size_t nc = cols (); + + FloatComplexMatrix retval (nr, nc); + + size_t npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + FloatComplexMatrix tmp (*this); + FloatComplex *in (tmp.fortran_vec ()); + FloatComplex *out (retval.fortran_vec ()); + + octave_fftw::ifft (in, out, npts, nsamples); + + return retval; +} + +FloatComplexMatrix +FloatMatrix::fourier2d (void) const +{ + dim_vector dv(rows (), cols ()); + + const float *in = fortran_vec (); + FloatComplexMatrix retval (rows (), cols ()); + octave_fftw::fftNd (in, retval.fortran_vec (), 2, dv); + + return retval; +} + +FloatComplexMatrix +FloatMatrix::ifourier2d (void) const +{ + dim_vector dv(rows (), cols ()); + + FloatComplexMatrix retval (*this); + FloatComplex *out (retval.fortran_vec ()); + + octave_fftw::ifftNd (out, out, 2, dv); + + return retval; +} + +#else + +FloatComplexMatrix +FloatMatrix::fourier (void) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + octave_idx_type nn = 4*npts+15; + + Array<FloatComplex> wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + + retval = FloatComplexMatrix (*this); + FloatComplex *tmp_data = retval.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + F77_FUNC (cfftf, CFFTF) (npts, &tmp_data[npts*j], pwsave); + } + + return retval; +} + +FloatComplexMatrix +FloatMatrix::ifourier (void) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + octave_idx_type nn = 4*npts+15; + + Array<FloatComplex> wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + + retval = FloatComplexMatrix (*this); + FloatComplex *tmp_data = retval.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + F77_FUNC (cfftb, CFFTB) (npts, &tmp_data[npts*j], pwsave); + } + + for (octave_idx_type j = 0; j < npts*nsamples; j++) + tmp_data[j] = tmp_data[j] / static_cast<float> (npts); + + return retval; +} + +FloatComplexMatrix +FloatMatrix::fourier2d (void) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + octave_idx_type nn = 4*npts+15; + + Array<FloatComplex> wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + + retval = FloatComplexMatrix (*this); + FloatComplex *tmp_data = retval.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + F77_FUNC (cfftf, CFFTF) (npts, &tmp_data[npts*j], pwsave); + } + + npts = nc; + nsamples = nr; + nn = 4*npts+15; + + wsave.resize (nn); + pwsave = wsave.fortran_vec (); + + Array<FloatComplex> tmp (npts); + FloatComplex *prow = tmp.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type i = 0; i < npts; i++) + prow[i] = tmp_data[i*nr + j]; + + F77_FUNC (cfftf, CFFTF) (npts, prow, pwsave); + + for (octave_idx_type i = 0; i < npts; i++) + tmp_data[i*nr + j] = prow[i]; + } + + return retval; +} + +FloatComplexMatrix +FloatMatrix::ifourier2d (void) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + octave_idx_type nn = 4*npts+15; + + Array<FloatComplex> wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + + retval = FloatComplexMatrix (*this); + FloatComplex *tmp_data = retval.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + F77_FUNC (cfftb, CFFTB) (npts, &tmp_data[npts*j], pwsave); + } + + for (octave_idx_type j = 0; j < npts*nsamples; j++) + tmp_data[j] = tmp_data[j] / static_cast<float> (npts); + + npts = nc; + nsamples = nr; + nn = 4*npts+15; + + wsave.resize (nn); + pwsave = wsave.fortran_vec (); + + Array<FloatComplex> tmp (npts); + FloatComplex *prow = tmp.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type i = 0; i < npts; i++) + prow[i] = tmp_data[i*nr + j]; + + F77_FUNC (cfftb, CFFTB) (npts, prow, pwsave); + + for (octave_idx_type i = 0; i < npts; i++) + tmp_data[i*nr + j] = prow[i] / static_cast<float> (npts); + } + + return retval; +} + +#endif + +FloatDET +FloatMatrix::determinant (void) const +{ + octave_idx_type info; + float rcond; + return determinant (info, rcond, 0); +} + +FloatDET +FloatMatrix::determinant (octave_idx_type& info) const +{ + float rcond; + return determinant (info, rcond, 0); +} + +FloatDET +FloatMatrix::determinant (octave_idx_type& info, float& rcond, int calc_cond) const +{ + FloatDET retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr == 0 || nc == 0) + { + retval = FloatDET (1.0, 0); + } + else + { + Array<octave_idx_type> ipvt (nr); + octave_idx_type *pipvt = ipvt.fortran_vec (); + + FloatMatrix atmp = *this; + float *tmp_data = atmp.fortran_vec (); + + info = 0; + + // Calculate the norm of the matrix, for later use. + float anorm = 0; + if (calc_cond) + anorm = atmp.abs().sum().row(static_cast<octave_idx_type>(0)).max(); + + F77_XFCN (sgetrf, SGETRF, (nr, nr, tmp_data, nr, pipvt, info)); + + // Throw-away extra info LAPACK gives so as to not change output. + rcond = 0.0; + if (info != 0) + { + info = -1; + retval = FloatDET (); + } + else + { + if (calc_cond) + { + // Now calc the condition number for non-singular matrix. + char job = '1'; + Array<float> z (4 * nc); + float *pz = z.fortran_vec (); + Array<octave_idx_type> iz (nc); + octave_idx_type *piz = iz.fortran_vec (); + + F77_XFCN (sgecon, SGECON, (F77_CONST_CHAR_ARG2 (&job, 1), + nc, tmp_data, nr, anorm, + rcond, pz, piz, info + F77_CHAR_ARG_LEN (1))); + } + + if (info != 0) + { + info = -1; + retval = FloatDET (); + } + else + { + float c = 1.0; + int e = 0; + + for (octave_idx_type i = 0; i < nc; i++) + { + if (ipvt(i) != (i+1)) + c = -c; + + c *= atmp(i,i); + + if (c == 0.0) + break; + + while (fabs (c) < 0.5) + { + c *= 2.0; + e--; + } + + while (fabs (c) >= 2.0) + { + c /= 2.0; + e++; + } + } + + retval = FloatDET (c, e); + } + } + } + + return retval; +} + +FloatMatrix +FloatMatrix::utsolve (MatrixType &mattype, const FloatMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler, + bool calc_cond) const +{ + FloatMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr != b.rows ()) + (*current_liboctave_error_handler) + ("matrix dimension mismatch solution of linear equations"); + else if (nr == 0 || nc == 0 || b.cols () == 0) + retval = FloatMatrix (nc, b.cols (), 0.0); + else + { + volatile int typ = mattype.type (); + + if (typ == MatrixType::Permuted_Upper || + typ == MatrixType::Upper) + { + octave_idx_type b_nc = b.cols (); + rcond = 1.; + info = 0; + + if (typ == MatrixType::Permuted_Upper) + { + (*current_liboctave_error_handler) + ("permuted triangular matrix not implemented"); + } + else + { + const float *tmp_data = fortran_vec (); + + if (calc_cond) + { + char norm = '1'; + char uplo = 'U'; + char dia = 'N'; + + Array<float> z (3 * nc); + float *pz = z.fortran_vec (); + Array<octave_idx_type> iz (nc); + octave_idx_type *piz = iz.fortran_vec (); + + F77_XFCN (strcon, STRCON, (F77_CONST_CHAR_ARG2 (&norm, 1), + F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&dia, 1), + nr, tmp_data, nr, rcond, + pz, piz, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (info != 0) + info = -2; + + volatile float rcond_plus_one = rcond + 1.0; + + if (rcond_plus_one == 1.0 || xisnan (rcond)) + { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision, rcond = %g", + rcond); + } + } + + if (info == 0) + { + retval = b; + float *result = retval.fortran_vec (); + + char uplo = 'U'; + char trans = 'N'; + char dia = 'N'; + + F77_XFCN (strtrs, STRTRS, (F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&trans, 1), + F77_CONST_CHAR_ARG2 (&dia, 1), + nr, b_nc, tmp_data, nr, + result, nr, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + } + } + } + else + (*current_liboctave_error_handler) ("incorrect matrix type"); + } + + return retval; +} + +FloatMatrix +FloatMatrix::ltsolve (MatrixType &mattype, const FloatMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler, + bool calc_cond) const +{ + FloatMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr != b.rows ()) + (*current_liboctave_error_handler) + ("matrix dimension mismatch solution of linear equations"); + else if (nr == 0 || nc == 0 || b.cols () == 0) + retval = FloatMatrix (nc, b.cols (), 0.0); + else + { + volatile int typ = mattype.type (); + + if (typ == MatrixType::Permuted_Lower || + typ == MatrixType::Lower) + { + octave_idx_type b_nc = b.cols (); + rcond = 1.; + info = 0; + + if (typ == MatrixType::Permuted_Lower) + { + (*current_liboctave_error_handler) + ("permuted triangular matrix not implemented"); + } + else + { + const float *tmp_data = fortran_vec (); + + if (calc_cond) + { + char norm = '1'; + char uplo = 'L'; + char dia = 'N'; + + Array<float> z (3 * nc); + float *pz = z.fortran_vec (); + Array<octave_idx_type> iz (nc); + octave_idx_type *piz = iz.fortran_vec (); + + F77_XFCN (strcon, STRCON, (F77_CONST_CHAR_ARG2 (&norm, 1), + F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&dia, 1), + nr, tmp_data, nr, rcond, + pz, piz, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (info != 0) + info = -2; + + volatile float rcond_plus_one = rcond + 1.0; + + if (rcond_plus_one == 1.0 || xisnan (rcond)) + { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision, rcond = %g", + rcond); + } + } + + if (info == 0) + { + retval = b; + float *result = retval.fortran_vec (); + + char uplo = 'L'; + char trans = 'N'; + char dia = 'N'; + + F77_XFCN (strtrs, STRTRS, (F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&trans, 1), + F77_CONST_CHAR_ARG2 (&dia, 1), + nr, b_nc, tmp_data, nr, + result, nr, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + } + } + } + else + (*current_liboctave_error_handler) ("incorrect matrix type"); + } + + return retval; +} + +FloatMatrix +FloatMatrix::fsolve (MatrixType &mattype, const FloatMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler, + bool calc_cond) const +{ + FloatMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr != nc || nr != b.rows ()) + (*current_liboctave_error_handler) + ("matrix dimension mismatch solution of linear equations"); + else if (nr == 0 || b.cols () == 0) + retval = FloatMatrix (nc, b.cols (), 0.0); + else + { + volatile int typ = mattype.type (); + + // Calculate the norm of the matrix, for later use. + float anorm = -1.; + + if (typ == MatrixType::Hermitian) + { + info = 0; + char job = 'L'; + FloatMatrix atmp = *this; + float *tmp_data = atmp.fortran_vec (); + anorm = atmp.abs().sum().row(static_cast<octave_idx_type>(0)).max(); + + F77_XFCN (spotrf, SPOTRF, (F77_CONST_CHAR_ARG2 (&job, 1), nr, + tmp_data, nr, info + F77_CHAR_ARG_LEN (1))); + + // Throw-away extra info LAPACK gives so as to not change output. + rcond = 0.0; + if (info != 0) + { + info = -2; + + mattype.mark_as_unsymmetric (); + typ = MatrixType::Full; + } + else + { + if (calc_cond) + { + Array<float> z (3 * nc); + float *pz = z.fortran_vec (); + Array<octave_idx_type> iz (nc); + octave_idx_type *piz = iz.fortran_vec (); + + F77_XFCN (spocon, SPOCON, (F77_CONST_CHAR_ARG2 (&job, 1), + nr, tmp_data, nr, anorm, + rcond, pz, piz, info + F77_CHAR_ARG_LEN (1))); + + if (info != 0) + info = -2; + + volatile float rcond_plus_one = rcond + 1.0; + + if (rcond_plus_one == 1.0 || xisnan (rcond)) + { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision, rcond = %g", + rcond); + } + } + + if (info == 0) + { + retval = b; + float *result = retval.fortran_vec (); + + octave_idx_type b_nc = b.cols (); + + F77_XFCN (spotrs, SPOTRS, (F77_CONST_CHAR_ARG2 (&job, 1), + nr, b_nc, tmp_data, nr, + result, b.rows(), info + F77_CHAR_ARG_LEN (1))); + } + else + { + mattype.mark_as_unsymmetric (); + typ = MatrixType::Full; + } + } + } + + if (typ == MatrixType::Full) + { + info = 0; + + Array<octave_idx_type> ipvt (nr); + octave_idx_type *pipvt = ipvt.fortran_vec (); + + FloatMatrix atmp = *this; + float *tmp_data = atmp.fortran_vec (); + if(anorm < 0.) + anorm = atmp.abs().sum().row(static_cast<octave_idx_type>(0)).max(); + + Array<float> z (4 * nc); + float *pz = z.fortran_vec (); + Array<octave_idx_type> iz (nc); + octave_idx_type *piz = iz.fortran_vec (); + + F77_XFCN (sgetrf, SGETRF, (nr, nr, tmp_data, nr, pipvt, info)); + + // Throw-away extra info LAPACK gives so as to not change output. + rcond = 0.0; + if (info != 0) + { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision"); + + mattype.mark_as_rectangular (); + } + else + { + if (calc_cond) + { + // Now calculate the condition number for + // non-singular matrix. + char job = '1'; + F77_XFCN (sgecon, SGECON, (F77_CONST_CHAR_ARG2 (&job, 1), + nc, tmp_data, nr, anorm, + rcond, pz, piz, info + F77_CHAR_ARG_LEN (1))); + + if (info != 0) + info = -2; + + volatile float rcond_plus_one = rcond + 1.0; + + if (rcond_plus_one == 1.0 || xisnan (rcond)) + { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision, rcond = %g", + rcond); + } + } + + if (info == 0) + { + retval = b; + float *result = retval.fortran_vec (); + + octave_idx_type b_nc = b.cols (); + + char job = 'N'; + F77_XFCN (sgetrs, SGETRS, (F77_CONST_CHAR_ARG2 (&job, 1), + nr, b_nc, tmp_data, nr, + pipvt, result, b.rows(), info + F77_CHAR_ARG_LEN (1))); + } + else + mattype.mark_as_rectangular (); + } + } + else if (typ != MatrixType::Hermitian) + (*current_liboctave_error_handler) ("incorrect matrix type"); + } + + return retval; +} + +FloatMatrix +FloatMatrix::solve (MatrixType &typ, const FloatMatrix& b) const +{ + octave_idx_type info; + float rcond; + return solve (typ, b, info, rcond, 0); +} + +FloatMatrix +FloatMatrix::solve (MatrixType &typ, const FloatMatrix& b, octave_idx_type& info, + float& rcond) const +{ + return solve (typ, b, info, rcond, 0); +} + +FloatMatrix +FloatMatrix::solve (MatrixType &mattype, const FloatMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler, + bool singular_fallback) const +{ + FloatMatrix retval; + int typ = mattype.type (); + + if (typ == MatrixType::Unknown) + typ = mattype.type (*this); + + // Only calculate the condition number for LU/Cholesky + if (typ == MatrixType::Upper || typ == MatrixType::Permuted_Upper) + retval = utsolve (mattype, b, info, rcond, sing_handler, false); + else if (typ == MatrixType::Lower || typ == MatrixType::Permuted_Lower) + retval = ltsolve (mattype, b, info, rcond, sing_handler, false); + else if (typ == MatrixType::Full || typ == MatrixType::Hermitian) + retval = fsolve (mattype, b, info, rcond, sing_handler, true); + else if (typ != MatrixType::Rectangular) + { + (*current_liboctave_error_handler) ("unknown matrix type"); + return FloatMatrix (); + } + + // Rectangular or one of the above solvers flags a singular matrix + if (singular_fallback && mattype.type () == MatrixType::Rectangular) + { + octave_idx_type rank; + retval = lssolve (b, info, rank, rcond); + } + + return retval; +} + +FloatComplexMatrix +FloatMatrix::solve (MatrixType &typ, const FloatComplexMatrix& b) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (typ, b); +} + +FloatComplexMatrix +FloatMatrix::solve (MatrixType &typ, const FloatComplexMatrix& b, + octave_idx_type& info) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (typ, b, info); +} + +FloatComplexMatrix +FloatMatrix::solve (MatrixType &typ, const FloatComplexMatrix& b, octave_idx_type& info, + float& rcond) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (typ, b, info, rcond); +} + +FloatComplexMatrix +FloatMatrix::solve (MatrixType &typ, const FloatComplexMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler, + bool singular_fallback) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (typ, b, info, rcond, sing_handler, singular_fallback); +} + +FloatColumnVector +FloatMatrix::solve (MatrixType &typ, const FloatColumnVector& b) const +{ + octave_idx_type info; float rcond; + return solve (typ, b, info, rcond); +} + +FloatColumnVector +FloatMatrix::solve (MatrixType &typ, const FloatColumnVector& b, + octave_idx_type& info) const +{ + float rcond; + return solve (typ, b, info, rcond); +} + +FloatColumnVector +FloatMatrix::solve (MatrixType &typ, const FloatColumnVector& b, octave_idx_type& info, + float& rcond) const +{ + return solve (typ, b, info, rcond, 0); +} + +FloatColumnVector +FloatMatrix::solve (MatrixType &typ, const FloatColumnVector& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler) const +{ + FloatMatrix tmp (b); + return solve (typ, tmp, info, rcond, sing_handler).column(static_cast<octave_idx_type> (0)); +} + +FloatComplexColumnVector +FloatMatrix::solve (MatrixType &typ, const FloatComplexColumnVector& b) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (typ, b); +} + +FloatComplexColumnVector +FloatMatrix::solve (MatrixType &typ, const FloatComplexColumnVector& b, + octave_idx_type& info) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (typ, b, info); +} + +FloatComplexColumnVector +FloatMatrix::solve (MatrixType &typ, const FloatComplexColumnVector& b, + octave_idx_type& info, float& rcond) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (typ, b, info, rcond); +} + +FloatComplexColumnVector +FloatMatrix::solve (MatrixType &typ, const FloatComplexColumnVector& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve(typ, b, info, rcond, sing_handler); +} + +FloatMatrix +FloatMatrix::solve (const FloatMatrix& b) const +{ + octave_idx_type info; + float rcond; + return solve (b, info, rcond, 0); +} + +FloatMatrix +FloatMatrix::solve (const FloatMatrix& b, octave_idx_type& info) const +{ + float rcond; + return solve (b, info, rcond, 0); +} + +FloatMatrix +FloatMatrix::solve (const FloatMatrix& b, octave_idx_type& info, float& rcond) const +{ + return solve (b, info, rcond, 0); +} + +FloatMatrix +FloatMatrix::solve (const FloatMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler) const +{ + MatrixType mattype (*this); + return solve (mattype, b, info, rcond, sing_handler); +} + +FloatComplexMatrix +FloatMatrix::solve (const FloatComplexMatrix& b) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (b); +} + +FloatComplexMatrix +FloatMatrix::solve (const FloatComplexMatrix& b, octave_idx_type& info) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (b, info); +} + +FloatComplexMatrix +FloatMatrix::solve (const FloatComplexMatrix& b, octave_idx_type& info, float& rcond) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (b, info, rcond); +} + +FloatComplexMatrix +FloatMatrix::solve (const FloatComplexMatrix& b, octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (b, info, rcond, sing_handler); +} + +FloatColumnVector +FloatMatrix::solve (const FloatColumnVector& b) const +{ + octave_idx_type info; float rcond; + return solve (b, info, rcond); +} + +FloatColumnVector +FloatMatrix::solve (const FloatColumnVector& b, octave_idx_type& info) const +{ + float rcond; + return solve (b, info, rcond); +} + +FloatColumnVector +FloatMatrix::solve (const FloatColumnVector& b, octave_idx_type& info, float& rcond) const +{ + return solve (b, info, rcond, 0); +} + +FloatColumnVector +FloatMatrix::solve (const FloatColumnVector& b, octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const +{ + MatrixType mattype (*this); + return solve (mattype, b, info, rcond, sing_handler); +} + +FloatComplexColumnVector +FloatMatrix::solve (const FloatComplexColumnVector& b) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (b); +} + +FloatComplexColumnVector +FloatMatrix::solve (const FloatComplexColumnVector& b, octave_idx_type& info) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (b, info); +} + +FloatComplexColumnVector +FloatMatrix::solve (const FloatComplexColumnVector& b, octave_idx_type& info, float& rcond) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (b, info, rcond); +} + +FloatComplexColumnVector +FloatMatrix::solve (const FloatComplexColumnVector& b, octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (b, info, rcond, sing_handler); +} + +FloatMatrix +FloatMatrix::lssolve (const FloatMatrix& b) const +{ + octave_idx_type info; + octave_idx_type rank; + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatMatrix +FloatMatrix::lssolve (const FloatMatrix& b, octave_idx_type& info) const +{ + octave_idx_type rank; + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatMatrix +FloatMatrix::lssolve (const FloatMatrix& b, octave_idx_type& info, + octave_idx_type& rank) const +{ + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatMatrix +FloatMatrix::lssolve (const FloatMatrix& b, octave_idx_type& info, + octave_idx_type& rank, float &rcond) const +{ + FloatMatrix retval; + + octave_idx_type nrhs = b.cols (); + + octave_idx_type m = rows (); + octave_idx_type n = cols (); + + if (m != b.rows ()) + (*current_liboctave_error_handler) + ("matrix dimension mismatch solution of linear equations"); + else if (m == 0 || n == 0 || b.cols () == 0) + retval = FloatMatrix (n, b.cols (), 0.0); + else + { + volatile octave_idx_type minmn = (m < n ? m : n); + octave_idx_type maxmn = m > n ? m : n; + rcond = -1.0; + if (m != n) + { + retval = FloatMatrix (maxmn, nrhs, 0.0); + + for (octave_idx_type j = 0; j < nrhs; j++) + for (octave_idx_type i = 0; i < m; i++) + retval.elem (i, j) = b.elem (i, j); + } + else + retval = b; + + FloatMatrix atmp = *this; + float *tmp_data = atmp.fortran_vec (); + + float *pretval = retval.fortran_vec (); + Array<float> s (minmn); + float *ps = s.fortran_vec (); + + // Ask DGELSD what the dimension of WORK should be. + octave_idx_type lwork = -1; + + Array<float> work (1); + + octave_idx_type smlsiz; + F77_FUNC (xilaenv, XILAENV) (9, F77_CONST_CHAR_ARG2 ("SGELSD", 6), + F77_CONST_CHAR_ARG2 (" ", 1), + 0, 0, 0, 0, smlsiz + F77_CHAR_ARG_LEN (6) + F77_CHAR_ARG_LEN (1)); + + octave_idx_type mnthr; + F77_FUNC (xilaenv, XILAENV) (6, F77_CONST_CHAR_ARG2 ("SGELSD", 6), + F77_CONST_CHAR_ARG2 (" ", 1), + m, n, nrhs, -1, mnthr + F77_CHAR_ARG_LEN (6) + F77_CHAR_ARG_LEN (1)); + + // We compute the size of iwork because DGELSD in older versions + // of LAPACK does not return it on a query call. + float dminmn = static_cast<float> (minmn); + float dsmlsizp1 = static_cast<float> (smlsiz+1); +#if defined (HAVE_LOG2) + float tmp = log2 (dminmn / dsmlsizp1); +#else + float tmp = log (dminmn / dsmlsizp1) / log (2.0); +#endif + octave_idx_type nlvl = static_cast<octave_idx_type> (tmp) + 1; + if (nlvl < 0) + nlvl = 0; + + octave_idx_type liwork = 3 * minmn * nlvl + 11 * minmn; + if (liwork < 1) + liwork = 1; + Array<octave_idx_type> iwork (liwork); + octave_idx_type* piwork = iwork.fortran_vec (); + + F77_XFCN (sgelsd, SGELSD, (m, n, nrhs, tmp_data, m, pretval, maxmn, + ps, rcond, rank, work.fortran_vec (), + lwork, piwork, info)); + + // The workspace query is broken in at least LAPACK 3.0.0 + // through 3.1.1 when n >= mnthr. The obtuse formula below + // should provide sufficient workspace for DGELSD to operate + // efficiently. + if (n >= mnthr) + { + const octave_idx_type wlalsd + = 9*m + 2*m*smlsiz + 8*m*nlvl + m*nrhs + (smlsiz+1)*(smlsiz+1); + + octave_idx_type addend = m; + + if (2*m-4 > addend) + addend = 2*m-4; + + if (nrhs > addend) + addend = nrhs; + + if (n-3*m > addend) + addend = n-3*m; + + if (wlalsd > addend) + addend = wlalsd; + + const octave_idx_type lworkaround = 4*m + m*m + addend; + + if (work(0) < lworkaround) + work(0) = lworkaround; + } + else if (m >= n) + { + octave_idx_type lworkaround + = 12*n + 2*n*smlsiz + 8*n*nlvl + n*nrhs + (smlsiz+1)*(smlsiz+1); + + if (work(0) < lworkaround) + work(0) = lworkaround; + } + + lwork = static_cast<octave_idx_type> (work(0)); + work.resize (lwork); + + F77_XFCN (sgelsd, SGELSD, (m, n, nrhs, tmp_data, m, pretval, + maxmn, ps, rcond, rank, + work.fortran_vec (), lwork, + piwork, info)); + + if (rank < minmn) + (*current_liboctave_warning_handler) + ("dgelsd: rank deficient %dx%d matrix, rank = %d", m, n, rank); + if (s.elem (0) == 0.0) + rcond = 0.0; + else + rcond = s.elem (minmn - 1) / s.elem (0); + + retval.resize (n, nrhs); + } + + return retval; +} + +FloatComplexMatrix +FloatMatrix::lssolve (const FloatComplexMatrix& b) const +{ + FloatComplexMatrix tmp (*this); + octave_idx_type info; + octave_idx_type rank; + float rcond; + return tmp.lssolve (b, info, rank, rcond); +} + +FloatComplexMatrix +FloatMatrix::lssolve (const FloatComplexMatrix& b, octave_idx_type& info) const +{ + FloatComplexMatrix tmp (*this); + octave_idx_type rank; + float rcond; + return tmp.lssolve (b, info, rank, rcond); +} + +FloatComplexMatrix +FloatMatrix::lssolve (const FloatComplexMatrix& b, octave_idx_type& info, + octave_idx_type& rank) const +{ + FloatComplexMatrix tmp (*this); + float rcond; + return tmp.lssolve (b, info, rank, rcond); +} + +FloatComplexMatrix +FloatMatrix::lssolve (const FloatComplexMatrix& b, octave_idx_type& info, + octave_idx_type& rank, float& rcond) const +{ + FloatComplexMatrix tmp (*this); + return tmp.lssolve (b, info, rank, rcond); +} + +FloatColumnVector +FloatMatrix::lssolve (const FloatColumnVector& b) const +{ + octave_idx_type info; + octave_idx_type rank; + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatColumnVector +FloatMatrix::lssolve (const FloatColumnVector& b, octave_idx_type& info) const +{ + octave_idx_type rank; + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatColumnVector +FloatMatrix::lssolve (const FloatColumnVector& b, octave_idx_type& info, + octave_idx_type& rank) const +{ + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatColumnVector +FloatMatrix::lssolve (const FloatColumnVector& b, octave_idx_type& info, + octave_idx_type& rank, float &rcond) const +{ + FloatColumnVector retval; + + octave_idx_type nrhs = 1; + + octave_idx_type m = rows (); + octave_idx_type n = cols (); + + if (m != b.length ()) + (*current_liboctave_error_handler) + ("matrix dimension mismatch solution of linear equations"); + else if (m == 0 || n == 0) + retval = FloatColumnVector (n, 0.0); + else + { + volatile octave_idx_type minmn = (m < n ? m : n); + octave_idx_type maxmn = m > n ? m : n; + rcond = -1.0; + + if (m != n) + { + retval = FloatColumnVector (maxmn, 0.0); + + for (octave_idx_type i = 0; i < m; i++) + retval.elem (i) = b.elem (i); + } + else + retval = b; + + FloatMatrix atmp = *this; + float *tmp_data = atmp.fortran_vec (); + + float *pretval = retval.fortran_vec (); + Array<float> s (minmn); + float *ps = s.fortran_vec (); + + // Ask DGELSD what the dimension of WORK should be. + octave_idx_type lwork = -1; + + Array<float> work (1); + + octave_idx_type smlsiz; + F77_FUNC (xilaenv, XILAENV) (9, F77_CONST_CHAR_ARG2 ("SGELSD", 6), + F77_CONST_CHAR_ARG2 (" ", 1), + 0, 0, 0, 0, smlsiz + F77_CHAR_ARG_LEN (6) + F77_CHAR_ARG_LEN (1)); + + // We compute the size of iwork because DGELSD in older versions + // of LAPACK does not return it on a query call. + float dminmn = static_cast<float> (minmn); + float dsmlsizp1 = static_cast<float> (smlsiz+1); +#if defined (HAVE_LOG2) + float tmp = log2 (dminmn / dsmlsizp1); +#else + float tmp = log (dminmn / dsmlsizp1) / log (2.0); +#endif + octave_idx_type nlvl = static_cast<octave_idx_type> (tmp) + 1; + if (nlvl < 0) + nlvl = 0; + + octave_idx_type liwork = 3 * minmn * nlvl + 11 * minmn; + if (liwork < 1) + liwork = 1; + Array<octave_idx_type> iwork (liwork); + octave_idx_type* piwork = iwork.fortran_vec (); + + F77_XFCN (sgelsd, SGELSD, (m, n, nrhs, tmp_data, m, pretval, maxmn, + ps, rcond, rank, work.fortran_vec (), + lwork, piwork, info)); + + lwork = static_cast<octave_idx_type> (work(0)); + work.resize (lwork); + + F77_XFCN (sgelsd, SGELSD, (m, n, nrhs, tmp_data, m, pretval, + maxmn, ps, rcond, rank, + work.fortran_vec (), lwork, + piwork, info)); + + if (rank < minmn) + { + if (rank < minmn) + (*current_liboctave_warning_handler) + ("dgelsd: rank deficient %dx%d matrix, rank = %d", m, n, rank); + if (s.elem (0) == 0.0) + rcond = 0.0; + else + rcond = s.elem (minmn - 1) / s.elem (0); + } + + retval.resize (n, nrhs); + } + + return retval; +} + +FloatComplexColumnVector +FloatMatrix::lssolve (const FloatComplexColumnVector& b) const +{ + FloatComplexMatrix tmp (*this); + octave_idx_type info; + octave_idx_type rank; + float rcond; + return tmp.lssolve (b, info, rank, rcond); +} + +FloatComplexColumnVector +FloatMatrix::lssolve (const FloatComplexColumnVector& b, octave_idx_type& info) const +{ + FloatComplexMatrix tmp (*this); + octave_idx_type rank; + float rcond; + return tmp.lssolve (b, info, rank, rcond); +} + +FloatComplexColumnVector +FloatMatrix::lssolve (const FloatComplexColumnVector& b, octave_idx_type& info, + octave_idx_type& rank) const +{ + FloatComplexMatrix tmp (*this); + float rcond; + return tmp.lssolve (b, info, rank, rcond); +} + +FloatComplexColumnVector +FloatMatrix::lssolve (const FloatComplexColumnVector& b, octave_idx_type& info, + octave_idx_type& rank, float &rcond) const +{ + FloatComplexMatrix tmp (*this); + return tmp.lssolve (b, info, rank, rcond); +} + +// Constants for matrix exponential calculation. + +static float padec [] = +{ + 5.0000000000000000e-1, + 1.1666666666666667e-1, + 1.6666666666666667e-2, + 1.6025641025641026e-3, + 1.0683760683760684e-4, + 4.8562548562548563e-6, + 1.3875013875013875e-7, + 1.9270852604185938e-9, +}; + +static void +solve_singularity_warning (float rcond) +{ + (*current_liboctave_warning_handler) + ("singular matrix encountered in expm calculation, rcond = %g", + rcond); +} + +FloatMatrix +FloatMatrix::expm (void) const +{ + FloatMatrix retval; + + FloatMatrix m = *this; + + if (numel () == 1) + return FloatMatrix (1, 1, exp (m(0))); + + octave_idx_type nc = columns (); + + // Preconditioning step 1: trace normalization to reduce dynamic + // range of poles, but avoid making stable eigenvalues unstable. + + // trace shift value + volatile float trshift = 0.0; + + for (octave_idx_type i = 0; i < nc; i++) + trshift += m.elem (i, i); + + trshift /= nc; + + if (trshift > 0.0) + { + for (octave_idx_type i = 0; i < nc; i++) + m.elem (i, i) -= trshift; + } + + // Preconditioning step 2: balancing; code follows development + // in AEPBAL + + float *p_m = m.fortran_vec (); + + octave_idx_type info, ilo, ihi, ilos, ihis; + Array<float> dpermute (nc); + Array<float> dscale (nc); + + // permutation first + char job = 'P'; + F77_XFCN (sgebal, SGEBAL, (F77_CONST_CHAR_ARG2 (&job, 1), + nc, p_m, nc, ilo, ihi, + dpermute.fortran_vec (), info + F77_CHAR_ARG_LEN (1))); + + // then scaling + job = 'S'; + F77_XFCN (sgebal, SGEBAL, (F77_CONST_CHAR_ARG2 (&job, 1), + nc, p_m, nc, ilos, ihis, + dscale.fortran_vec (), info + F77_CHAR_ARG_LEN (1))); + + // Preconditioning step 3: scaling. + + FloatColumnVector work(nc); + float inf_norm; + + F77_XFCN (xslange, XSLANGE, (F77_CONST_CHAR_ARG2 ("I", 1), + nc, nc, m.fortran_vec (), nc, + work.fortran_vec (), inf_norm + F77_CHAR_ARG_LEN (1))); + + octave_idx_type sqpow = static_cast<octave_idx_type> (inf_norm > 0.0 + ? (1.0 + log (inf_norm) / log (2.0)) + : 0.0); + + // Check whether we need to square at all. + + if (sqpow < 0) + sqpow = 0; + + if (sqpow > 0) + { + if (sqpow > 1023) + sqpow = 1023; + + float scale_factor = 1.0; + for (octave_idx_type i = 0; i < sqpow; i++) + scale_factor *= 2.0; + + m = m / scale_factor; + } + + // npp, dpp: pade' approx polynomial matrices. + + FloatMatrix npp (nc, nc, 0.0); + float *pnpp = npp.fortran_vec (); + FloatMatrix dpp = npp; + float *pdpp = dpp.fortran_vec (); + + // Now powers a^8 ... a^1. + + octave_idx_type minus_one_j = -1; + for (octave_idx_type j = 7; j >= 0; j--) + { + for (octave_idx_type i = 0; i < nc; i++) + { + octave_idx_type k = i * nc + i; + pnpp[k] += padec[j]; + pdpp[k] += minus_one_j * padec[j]; + } + + npp = m * npp; + pnpp = npp.fortran_vec (); + + dpp = m * dpp; + pdpp = dpp.fortran_vec (); + + minus_one_j *= -1; + } + + // Zero power. + + dpp = -dpp; + for (octave_idx_type j = 0; j < nc; j++) + { + npp.elem (j, j) += 1.0; + dpp.elem (j, j) += 1.0; + } + + // Compute pade approximation = inverse (dpp) * npp. + + float rcond; + retval = dpp.solve (npp, info, rcond, solve_singularity_warning); + + if (info < 0) + return retval; + + // Reverse preconditioning step 3: repeated squaring. + + while (sqpow) + { + retval = retval * retval; + sqpow--; + } + + // Reverse preconditioning step 2: inverse balancing. + // apply inverse scaling to computed exponential + for (octave_idx_type i = 0; i < nc; i++) + for (octave_idx_type j = 0; j < nc; j++) + retval(i,j) *= dscale(i) / dscale(j); + + OCTAVE_QUIT; + + // construct balancing permutation vector + Array<octave_idx_type> iperm (nc); + for (octave_idx_type i = 0; i < nc; i++) + iperm(i) = i; // identity permutation + + // leading permutations in forward order + for (octave_idx_type i = 0; i < (ilo-1); i++) + { + octave_idx_type swapidx = static_cast<octave_idx_type> (dpermute(i)) - 1; + octave_idx_type tmp = iperm(i); + iperm(i) = iperm (swapidx); + iperm(swapidx) = tmp; + } + + // construct inverse balancing permutation vector + Array<octave_idx_type> invpvec (nc); + for (octave_idx_type i = 0; i < nc; i++) + invpvec(iperm(i)) = i; // Thanks to R. A. Lippert for this method + + OCTAVE_QUIT; + + FloatMatrix tmpMat = retval; + for (octave_idx_type i = 0; i < nc; i++) + for (octave_idx_type j = 0; j < nc; j++) + retval(i,j) = tmpMat(invpvec(i),invpvec(j)); + + OCTAVE_QUIT; + + for (octave_idx_type i = 0; i < nc; i++) + iperm(i) = i; // identity permutation + + // trailing permutations must be done in reverse order + for (octave_idx_type i = nc - 1; i >= ihi; i--) + { + octave_idx_type swapidx = static_cast<octave_idx_type> (dpermute(i)) - 1; + octave_idx_type tmp = iperm(i); + iperm(i) = iperm(swapidx); + iperm(swapidx) = tmp; + } + + // construct inverse balancing permutation vector + for (octave_idx_type i = 0; i < nc; i++) + invpvec(iperm(i)) = i; // Thanks to R. A. Lippert for this method + + OCTAVE_QUIT; + + tmpMat = retval; + for (octave_idx_type i = 0; i < nc; i++) + for (octave_idx_type j = 0; j < nc; j++) + retval(i,j) = tmpMat(invpvec(i),invpvec(j)); + + // Reverse preconditioning step 1: fix trace normalization. + + if (trshift > 0.0) + retval = expf (trshift) * retval; + + return retval; +} + +FloatMatrix& +FloatMatrix::operator += (const FloatDiagMatrix& a) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (nr != a_nr || nc != a_nc) + { + gripe_nonconformant ("operator +=", nr, nc, a_nr, a_nc); + return *this; + } + + for (octave_idx_type i = 0; i < a.length (); i++) + elem (i, i) += a.elem (i, i); + + return *this; +} + +FloatMatrix& +FloatMatrix::operator -= (const FloatDiagMatrix& a) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (nr != a_nr || nc != a_nc) + { + gripe_nonconformant ("operator -=", nr, nc, a_nr, a_nc); + return *this; + } + + for (octave_idx_type i = 0; i < a.length (); i++) + elem (i, i) -= a.elem (i, i); + + return *this; +} + +// unary operations + +boolMatrix +FloatMatrix::operator ! (void) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + boolMatrix b (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + b.elem (i, j) = ! elem (i, j); + + return b; +} + +// column vector by row vector -> matrix operations + +FloatMatrix +operator * (const FloatColumnVector& v, const FloatRowVector& a) +{ + FloatMatrix retval; + + octave_idx_type len = v.length (); + + if (len != 0) + { + octave_idx_type a_len = a.length (); + + retval.resize (len, a_len); + float *c = retval.fortran_vec (); + + F77_XFCN (sgemm, SGEMM, (F77_CONST_CHAR_ARG2 ("N", 1), + F77_CONST_CHAR_ARG2 ("N", 1), + len, a_len, 1, 1.0, v.data (), len, + a.data (), 1, 0.0, c, len + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + } + + return retval; +} + +// other operations. + +FloatMatrix +FloatMatrix::map (dmapper fcn) const +{ + return MArray2<float>::map<float> (func_ptr (fcn)); +} + +FloatComplexMatrix +FloatMatrix::map (cmapper fcn) const +{ + return MArray2<float>::map<FloatComplex> (func_ptr (fcn)); +} + +boolMatrix +FloatMatrix::map (bmapper fcn) const +{ + return MArray2<float>::map<bool> (func_ptr (fcn)); +} + +bool +FloatMatrix::any_element_is_negative (bool neg_zero) const +{ + octave_idx_type nel = nelem (); + + if (neg_zero) + { + for (octave_idx_type i = 0; i < nel; i++) + if (lo_ieee_signbit (elem (i))) + return true; + } + else + { + for (octave_idx_type i = 0; i < nel; i++) + if (elem (i) < 0) + return true; + } + + return false; +} + + +bool +FloatMatrix::any_element_is_inf_or_nan (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + float val = elem (i); + if (xisinf (val) || xisnan (val)) + return true; + } + + return false; +} + +bool +FloatMatrix::any_element_not_one_or_zero (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + float val = elem (i); + if (val != 0 && val != 1) + return true; + } + + return false; +} + +bool +FloatMatrix::all_elements_are_int_or_inf_or_nan (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + float val = elem (i); + if (xisnan (val) || D_NINT (val) == val) + continue; + else + return false; + } + + return true; +} + +// Return nonzero if any element of M is not an integer. Also extract +// the largest and smallest values and return them in MAX_VAL and MIN_VAL. + +bool +FloatMatrix::all_integers (float& max_val, float& min_val) const +{ + octave_idx_type nel = nelem (); + + if (nel > 0) + { + max_val = elem (0); + min_val = elem (0); + } + else + return false; + + for (octave_idx_type i = 0; i < nel; i++) + { + float val = elem (i); + + if (val > max_val) + max_val = val; + + if (val < min_val) + min_val = val; + + if (D_NINT (val) != val) + return false; + } + + return true; +} + +bool +FloatMatrix::too_large_for_float (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + float val = elem (i); + + if (! (xisnan (val) || xisinf (val)) + && fabs (val) > FLT_MAX) + return true; + } + + return false; +} + +// FIXME Do these really belong here? Maybe they should be +// in a base class? + +boolMatrix +FloatMatrix::all (int dim) const +{ + MX_ALL_OP (dim); +} + +boolMatrix +FloatMatrix::any (int dim) const +{ + MX_ANY_OP (dim); +} + +FloatMatrix +FloatMatrix::cumprod (int dim) const +{ + MX_CUMULATIVE_OP (FloatMatrix, float, *=); +} + +FloatMatrix +FloatMatrix::cumsum (int dim) const +{ + MX_CUMULATIVE_OP (FloatMatrix, float, +=); +} + +FloatMatrix +FloatMatrix::prod (int dim) const +{ + MX_REDUCTION_OP (FloatMatrix, *=, 1.0, 1.0); +} + +FloatMatrix +FloatMatrix::sum (int dim) const +{ + MX_REDUCTION_OP (FloatMatrix, +=, 0.0, 0.0); +} + +FloatMatrix +FloatMatrix::sumsq (int dim) const +{ +#define ROW_EXPR \ + float d = elem (i, j); \ + retval.elem (i, 0) += d * d + +#define COL_EXPR \ + float d = elem (i, j); \ + retval.elem (0, j) += d * d + + MX_BASE_REDUCTION_OP (FloatMatrix, ROW_EXPR, COL_EXPR, 0.0, 0.0); + +#undef ROW_EXPR +#undef COL_EXPR +} + +FloatMatrix +FloatMatrix::abs (void) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + FloatMatrix retval (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + retval (i, j) = fabs (elem (i, j)); + + return retval; +} + +FloatMatrix +FloatMatrix::diag (octave_idx_type k) const +{ + return MArray2<float>::diag (k); +} + +FloatColumnVector +FloatMatrix::row_min (void) const +{ + Array<octave_idx_type> dummy_idx; + return row_min (dummy_idx); +} + +FloatColumnVector +FloatMatrix::row_min (Array<octave_idx_type>& idx_arg) const +{ + FloatColumnVector result; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + result.resize (nr); + idx_arg.resize (nr); + + for (octave_idx_type i = 0; i < nr; i++) + { + octave_idx_type idx_j; + + float tmp_min = octave_Float_NaN; + + for (idx_j = 0; idx_j < nc; idx_j++) + { + tmp_min = elem (i, idx_j); + + if (! xisnan (tmp_min)) + break; + } + + for (octave_idx_type j = idx_j+1; j < nc; j++) + { + float tmp = elem (i, j); + + if (xisnan (tmp)) + continue; + else if (tmp < tmp_min) + { + idx_j = j; + tmp_min = tmp; + } + } + + result.elem (i) = tmp_min; + idx_arg.elem (i) = xisnan (tmp_min) ? 0 : idx_j; + } + } + + return result; +} + +FloatColumnVector +FloatMatrix::row_max (void) const +{ + Array<octave_idx_type> dummy_idx; + return row_max (dummy_idx); +} + +FloatColumnVector +FloatMatrix::row_max (Array<octave_idx_type>& idx_arg) const +{ + FloatColumnVector result; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + result.resize (nr); + idx_arg.resize (nr); + + for (octave_idx_type i = 0; i < nr; i++) + { + octave_idx_type idx_j; + + float tmp_max = octave_Float_NaN; + + for (idx_j = 0; idx_j < nc; idx_j++) + { + tmp_max = elem (i, idx_j); + + if (! xisnan (tmp_max)) + break; + } + + for (octave_idx_type j = idx_j+1; j < nc; j++) + { + float tmp = elem (i, j); + + if (xisnan (tmp)) + continue; + else if (tmp > tmp_max) + { + idx_j = j; + tmp_max = tmp; + } + } + + result.elem (i) = tmp_max; + idx_arg.elem (i) = xisnan (tmp_max) ? 0 : idx_j; + } + } + + return result; +} + +FloatRowVector +FloatMatrix::column_min (void) const +{ + Array<octave_idx_type> dummy_idx; + return column_min (dummy_idx); +} + +FloatRowVector +FloatMatrix::column_min (Array<octave_idx_type>& idx_arg) const +{ + FloatRowVector result; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + result.resize (nc); + idx_arg.resize (nc); + + for (octave_idx_type j = 0; j < nc; j++) + { + octave_idx_type idx_i; + + float tmp_min = octave_Float_NaN; + + for (idx_i = 0; idx_i < nr; idx_i++) + { + tmp_min = elem (idx_i, j); + + if (! xisnan (tmp_min)) + break; + } + + for (octave_idx_type i = idx_i+1; i < nr; i++) + { + float tmp = elem (i, j); + + if (xisnan (tmp)) + continue; + else if (tmp < tmp_min) + { + idx_i = i; + tmp_min = tmp; + } + } + + result.elem (j) = tmp_min; + idx_arg.elem (j) = xisnan (tmp_min) ? 0 : idx_i; + } + } + + return result; +} + +FloatRowVector +FloatMatrix::column_max (void) const +{ + Array<octave_idx_type> dummy_idx; + return column_max (dummy_idx); +} + +FloatRowVector +FloatMatrix::column_max (Array<octave_idx_type>& idx_arg) const +{ + FloatRowVector result; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + result.resize (nc); + idx_arg.resize (nc); + + for (octave_idx_type j = 0; j < nc; j++) + { + octave_idx_type idx_i; + + float tmp_max = octave_Float_NaN; + + for (idx_i = 0; idx_i < nr; idx_i++) + { + tmp_max = elem (idx_i, j); + + if (! xisnan (tmp_max)) + break; + } + + for (octave_idx_type i = idx_i+1; i < nr; i++) + { + float tmp = elem (i, j); + + if (xisnan (tmp)) + continue; + else if (tmp > tmp_max) + { + idx_i = i; + tmp_max = tmp; + } + } + + result.elem (j) = tmp_max; + idx_arg.elem (j) = xisnan (tmp_max) ? 0 : idx_i; + } + } + + return result; +} + +std::ostream& +operator << (std::ostream& os, const FloatMatrix& a) +{ + for (octave_idx_type i = 0; i < a.rows (); i++) + { + for (octave_idx_type j = 0; j < a.cols (); j++) + { + os << " "; + octave_write_float (os, a.elem (i, j)); + } + os << "\n"; + } + return os; +} + +std::istream& +operator >> (std::istream& is, FloatMatrix& a) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + if (nr < 1 || nc < 1) + is.clear (std::ios::badbit); + else + { + float tmp; + for (octave_idx_type i = 0; i < nr; i++) + for (octave_idx_type j = 0; j < nc; j++) + { + tmp = octave_read_float (is); + if (is) + a.elem (i, j) = tmp; + else + goto done; + } + } + + done: + + return is; +} + +FloatMatrix +Givens (float x, float y) +{ + float cc, s, temp_r; + + F77_FUNC (slartg, SLARTG) (x, y, cc, s, temp_r); + + FloatMatrix g (2, 2); + + g.elem (0, 0) = cc; + g.elem (1, 1) = cc; + g.elem (0, 1) = s; + g.elem (1, 0) = -s; + + return g; +} + +FloatMatrix +Sylvester (const FloatMatrix& a, const FloatMatrix& b, const FloatMatrix& c) +{ + FloatMatrix retval; + + // FIXME -- need to check that a, b, and c are all the same + // size. + + // Compute Schur decompositions. + + FloatSCHUR as (a, "U"); + FloatSCHUR bs (b, "U"); + + // Transform c to new coordinates. + + FloatMatrix ua = as.unitary_matrix (); + FloatMatrix sch_a = as.schur_matrix (); + + FloatMatrix ub = bs.unitary_matrix (); + FloatMatrix sch_b = bs.schur_matrix (); + + FloatMatrix cx = ua.transpose () * c * ub; + + // Solve the sylvester equation, back-transform, and return the + // solution. + + octave_idx_type a_nr = a.rows (); + octave_idx_type b_nr = b.rows (); + + float scale; + octave_idx_type info; + + float *pa = sch_a.fortran_vec (); + float *pb = sch_b.fortran_vec (); + float *px = cx.fortran_vec (); + + F77_XFCN (strsyl, STRSYL, (F77_CONST_CHAR_ARG2 ("N", 1), + F77_CONST_CHAR_ARG2 ("N", 1), + 1, a_nr, b_nr, pa, a_nr, pb, + b_nr, px, a_nr, scale, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + + // FIXME -- check info? + + retval = -ua*cx*ub.transpose (); + + return retval; +} + +// matrix by matrix -> matrix operations + +/* Simple Dot Product, Matrix-Vector and Matrix-Matrix Unit tests +%!assert([1 2 3] * [ 4 ; 5 ; 6], 32, 1e-14) +%!assert([1 2 ; 3 4 ] * [5 ; 6], [17 ; 39 ], 1e-14) +%!assert([1 2 ; 3 4 ] * [5 6 ; 7 8], [19 22; 43 50], 1e-14) +*/ + +/* Test some simple identities +%!shared M, cv, rv +%! M = randn(10,10); +%! cv = randn(10,1); +%! rv = randn(1,10); +%!assert([M*cv,M*cv],M*[cv,cv],1e-14) +%!assert([rv*M;rv*M],[rv;rv]*M,1e-14) +%!assert(2*rv*cv,[rv,rv]*[cv;cv],1e-14) +*/ + + +FloatMatrix +operator * (const FloatMatrix& m, const FloatMatrix& a) +{ + FloatMatrix retval; + + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.cols (); + + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (nc != a_nr) + gripe_nonconformant ("operator *", nr, nc, a_nr, a_nc); + else + { + if (nr == 0 || nc == 0 || a_nc == 0) + retval.resize (nr, a_nc, 0.0); + else + { + octave_idx_type ld = nr; + octave_idx_type lda = a_nr; + + retval.resize (nr, a_nc); + float *c = retval.fortran_vec (); + + if (a_nc == 1) + { + if (nr == 1) + F77_FUNC (xsdot, XSDOT) (nc, m.data (), 1, a.data (), 1, *c); + else + { + F77_XFCN (sgemv, SGEMV, (F77_CONST_CHAR_ARG2 ("N", 1), + nr, nc, 1.0, m.data (), ld, + a.data (), 1, 0.0, c, 1 + F77_CHAR_ARG_LEN (1))); + } + } + else + { + F77_XFCN (sgemm, SGEMM, (F77_CONST_CHAR_ARG2 ("N", 1), + F77_CONST_CHAR_ARG2 ("N", 1), + nr, a_nc, nc, 1.0, m.data (), + ld, a.data (), lda, 0.0, c, nr + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + } + } + } + + return retval; +} + +// FIXME -- it would be nice to share code among the min/max +// functions below. + +#define EMPTY_RETURN_CHECK(T) \ + if (nr == 0 || nc == 0) \ + return T (nr, nc); + +FloatMatrix +min (float d, const FloatMatrix& m) +{ + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + EMPTY_RETURN_CHECK (FloatMatrix); + + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmin (d, m (i, j)); + } + + return result; +} + +FloatMatrix +min (const FloatMatrix& m, float d) +{ + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + EMPTY_RETURN_CHECK (FloatMatrix); + + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmin (m (i, j), d); + } + + return result; +} + +FloatMatrix +min (const FloatMatrix& a, const FloatMatrix& b) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.columns (); + + if (nr != b.rows () || nc != b.columns ()) + { + (*current_liboctave_error_handler) + ("two-arg min expecting args of same size"); + return FloatMatrix (); + } + + EMPTY_RETURN_CHECK (FloatMatrix); + + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmin (a (i, j), b (i, j)); + } + + return result; +} + +FloatMatrix +max (float d, const FloatMatrix& m) +{ + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + EMPTY_RETURN_CHECK (FloatMatrix); + + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmax (d, m (i, j)); + } + + return result; +} + +FloatMatrix +max (const FloatMatrix& m, float d) +{ + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + EMPTY_RETURN_CHECK (FloatMatrix); + + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmax (m (i, j), d); + } + + return result; +} + +FloatMatrix +max (const FloatMatrix& a, const FloatMatrix& b) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.columns (); + + if (nr != b.rows () || nc != b.columns ()) + { + (*current_liboctave_error_handler) + ("two-arg max expecting args of same size"); + return FloatMatrix (); + } + + EMPTY_RETURN_CHECK (FloatMatrix); + + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmax (a (i, j), b (i, j)); + } + + return result; +} + +MS_CMP_OPS(FloatMatrix, , float, ) +MS_BOOL_OPS(FloatMatrix, float, 0.0) + +SM_CMP_OPS(float, , FloatMatrix, ) +SM_BOOL_OPS(float, FloatMatrix, 0.0) + +MM_CMP_OPS(FloatMatrix, , FloatMatrix, ) +MM_BOOL_OPS(FloatMatrix, FloatMatrix, 0.0) + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/